Repository: RayVentura/ShortGPT
Branch: stable
Commit: 3df4e0f7a422
Files: 152
Total size: 399.2 KB
Directory structure:
gitextract_zk_2yim7/
├── .database/
│ └── template_asset_db.json
├── .github/
│ ├── CHANGE_LOG.md
│ ├── CODEOWNERS
│ ├── CODE_OF_CONDUCT.md
│ ├── CONTRIBUTING.md
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.yaml
│ │ ├── feature_request.yaml
│ │ └── question.yaml
│ ├── SECURITY.md
│ ├── config.yml
│ ├── issue_label_bot.yaml
│ ├── pull_request_template.md
│ ├── settings.yml
│ └── workflows/
│ └── generate_release-changelog.yaml
├── .gitignore
├── CHANGES.txt
├── Dockerfile
├── LICENSE
├── README-Docker.md
├── README.md
├── docs/
│ ├── .gitignore
│ ├── README.md
│ ├── babel.config.js
│ ├── docs/
│ │ ├── api-key-manager.mdx
│ │ ├── asset-database.mdx
│ │ ├── content-translation-engine.mdx
│ │ ├── content-video-engine.mdx
│ │ ├── facts-short-engine.mdx
│ │ ├── getting-started.mdx
│ │ └── how-to-install.mdx
│ ├── docusaurus.config.js
│ ├── package.json
│ ├── plugins/
│ │ ├── my-loaders/
│ │ │ └── index.js
│ │ └── tailwind-loader/
│ │ └── index.js
│ ├── sidebars.js
│ ├── src/
│ │ ├── components/
│ │ │ └── Home.js
│ │ ├── css/
│ │ │ ├── custom.css
│ │ │ └── fragments.css
│ │ └── pages/
│ │ └── index.js
│ └── tailwind.config.js
├── gui/
│ ├── asset_components.py
│ ├── content_automation_ui.py
│ ├── gui_gradio.py
│ ├── ui_abstract_base.py
│ ├── ui_abstract_component.py
│ ├── ui_components_html.py
│ ├── ui_tab_asset_library.py
│ ├── ui_tab_config.py
│ ├── ui_tab_short_automation.py
│ ├── ui_tab_video_automation.py
│ └── ui_tab_video_translation.py
├── installation-notes.md
├── requirements.txt
├── runShortGPT.py
├── runShortGPTColab.py
├── setup.py
├── shortGPT/
│ ├── __init__.py
│ ├── api_utils/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── eleven_api.py
│ │ ├── image_api.py
│ │ └── pexels_api.py
│ ├── audio/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── audio_duration.py
│ │ ├── audio_utils.py
│ │ ├── edge_voice_module.py
│ │ ├── eleven_voice_module.py
│ │ └── voice_module.py
│ ├── config/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── api_db.py
│ │ ├── asset_db.py
│ │ ├── config.py
│ │ ├── languages.py
│ │ └── path_utils.py
│ ├── database/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── content_data_manager.py
│ │ ├── content_database.py
│ │ └── db_document.py
│ ├── editing_framework/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── core_editing_engine.py
│ │ ├── editing_engine.py
│ │ ├── editing_steps/
│ │ │ ├── __init__.py
│ │ │ ├── add_background_video.json
│ │ │ ├── add_background_voiceover.json
│ │ │ ├── add_voiceover.json
│ │ │ ├── background_music.json
│ │ │ ├── crop_1920x1080_to_short.json
│ │ │ ├── extract_audio.json
│ │ │ ├── insert_audio.json
│ │ │ ├── make_caption.json
│ │ │ ├── make_caption_arabic.json
│ │ │ ├── make_caption_arabic_landscape.json
│ │ │ ├── make_caption_landscape.json
│ │ │ ├── show_reddit_image.json
│ │ │ ├── show_top_image.json
│ │ │ ├── show_watermark.json
│ │ │ └── subscribe_animation.json
│ │ ├── flows/
│ │ │ ├── __init__.py
│ │ │ └── build_reddit_image.json
│ │ └── rendering_logger.py
│ ├── editing_utils/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── captions.py
│ │ ├── editing_images.py
│ │ └── handle_videos.py
│ ├── engine/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── abstract_content_engine.py
│ │ ├── content_short_engine.py
│ │ ├── content_translation_engine.py
│ │ ├── content_video_engine.py
│ │ ├── facts_short_engine.py
│ │ ├── multi_language_translation_engine.py
│ │ └── reddit_short_engine.py
│ ├── gpt/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── facts_gpt.py
│ │ ├── gpt_chat_video.py
│ │ ├── gpt_editing.py
│ │ ├── gpt_translate.py
│ │ ├── gpt_utils.py
│ │ ├── gpt_voice.py
│ │ ├── gpt_yt.py
│ │ └── reddit_gpt.py
│ ├── prompt_templates/
│ │ ├── __init__.py
│ │ ├── chat_video_edit_script.yaml
│ │ ├── chat_video_script.yaml
│ │ ├── editing_generate_images.yaml
│ │ ├── editing_generate_videos.yaml
│ │ ├── facts_generator.yaml
│ │ ├── facts_subjects_generation.yaml
│ │ ├── reddit_extract_question.yaml
│ │ ├── reddit_filter_realistic.yaml
│ │ ├── reddit_generate_question.yaml
│ │ ├── reddit_generate_script.yaml
│ │ ├── reddit_story_filter.yaml
│ │ ├── reddit_username.yaml
│ │ ├── translate_content.yaml
│ │ ├── voice_identify_gender.yaml
│ │ └── yt_title_description.yaml
│ ├── tracking/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── api_tracking.py
│ │ └── cost_analytics.py
│ └── utils/
│ ├── cli.py
│ └── requirements.py
└── videos/
└── .gitignore
================================================
FILE CONTENTS
================================================
================================================
FILE: .database/template_asset_db.json
================================================
{
"asset_collection": {
"1": {
"_id": "local_assets",
"white_reddit_template": {
"path": "public/white_reddit_template.png",
"type": "image",
"ts": "2023-07-03 19:41:55",
"required": true
},
"subscribe-animation": {
"path": "public/subscribe-animation.mp4",
"type": "video",
"ts": "2023-07-03 21:37:53",
"required": true
}
},
"2": {
"_id": "remote_assets",
"Music joakim karud dreams": {
"type": "background music",
"url": "https://www.youtube.com/watch?v=p56gqDhUYbU",
"ts": "2023-07-05 04:35:03"
},
"Music dj quads": {
"type": "background music",
"url": "https://www.youtube.com/watch?v=uUu1NcSHg2E",
"ts": "2023-07-05 05:03:44"
},
"Car race gameplay": {
"type": "background video",
"url": "https://www.youtube.com/watch?v=gBsJA8tCeyc",
"ts": "2023-07-04 23:07:44"
},
"Minecraft jumping circuit": {
"url": "https://www.youtube.com/watch?v=Pt5_GSKIWQM",
"type": "background video",
"ts": "2023-07-07 04:13:36"
},
"Ski gameplay": {
"url": "https://www.youtube.com/watch?v=8ao1NAOVKTU",
"type": "background video",
"ts": "2023-07-07 04:54:16"
}
}
}
}
================================================
FILE: .github/CHANGE_LOG.md
================================================
# Changelog
All notable changes to this project will be documented in this file.
## [Unreleased]
Upcoming changes.
### Added
### Changed
### Removed
## [0.0.1] - YYYY-MM-DD
Initial Release.
### Added
- What was added.
[Unreleased]: /
[0.0.1]: /v0.0.1
================================================
FILE: .github/CODEOWNERS
================================================
# These owners will be the default owners for everything in
# the repo. Unless a later match takes precedence,
# @USER will be requested for
# review when someone opens a pull request.
# if you want to add more owners just write it after the demo user @DemoUser
* @RayVentura
================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
overall community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or
advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series
of actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within
the community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).
[homepage]: https://www.contributor-covenant.org
For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.
================================================
FILE: .github/CONTRIBUTING.md
================================================
🌟💻📚
## Contributing
There are many exciting ways to contribute to ShortGPT, our AI automated content creation framework. 👏
See below for everything you can do and the processes to follow for each contribution method. Note that no matter how you contribute, your participation is governed by our ✨[Code of Conduct](CODE_OF_CONDUCT.md)✨.
## 🛠️ Make changes to the code or docs
- 🍴 Fork the project,
- 💡 make your changes,
- 🔀 and send a pull request! 🙌
Make sure you read and follow the instructions in the [pull request template](pull_request_template.md). And note that all participation in this project (including code submissions) is governed by our ✨[Code of Conduct](CODE_OF_CONDUCT.md)✨.
## 🐞📝 Submit bug reports or feature requests
Just use the GitHub issue tracker to submit your bug reports and feature requests. We appreciate your feedback! 🐛🔧
Let's make ShortGPT even better together! 🚀❤️
================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: rayventura
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: rayventura
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.yaml
================================================
name: 🐛 Bug Report
description: File a bug report
title: '🐛 [Bug]: '
labels: ['bug']
body:
- type: markdown
attributes:
value: |
Thanks for taking the time to fill out this bug report!
- type: textarea
id: what-happened
attributes:
label: What happened?
description: Describe the issue here.
placeholder: Tell us what you see!
validations:
required: true
- type: dropdown
id: browsers
attributes:
label: What type of browser are you seeing the problem on?
multiple: true
options:
- Firefox
- Chrome
- Safari
- Microsoft Edge
validations:
required: true
- type: dropdown
id: operating-systems
attributes:
label: What type of Operating System are you seeing the problem on?
multiple: true
options:
- Linux
- Windows
- Mac
- Google Colab
- Other
validations:
required: true
- type: input
id: python-version
attributes:
label: Python Version
description: What version of Python are you using?
placeholder: e.g. Python 3.9.0
validations:
required: true
- type: input
id: application-version
attributes:
label: Application Version
description: What version of the application are you using?
placeholder: e.g. v1.2.3
validations:
required: true
- type: textarea
id: expected-behavior
attributes:
label: Expected Behavior
description: What did you expect to happen?
placeholder: What did you expect?
validations:
required: true
- type: textarea
id: error-message
attributes:
label: Error Message
description: What error message did you receive?
placeholder:
render: shell
validations:
required: false
- type: textarea
id: logs
attributes:
label: Code to produce this issue.
description: Please copy and paste any relevant code to re-produce this issue.
render: shell
- type: textarea
id: screenshots-assets
attributes:
label: Screenshots/Assets/Relevant links
description: If applicable, add screenshots, assets or any relevant links that can help understand the issue.
placeholder: Provide any relevant material here
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.yaml
================================================
name: ✨ Feature request
description: Suggest an feature / idea for this project
title: '✨ [Feature Request / Suggestion]: '
labels: ['feature']
body:
- type: markdown
attributes:
value: |
We appreciate your feedback on how to improve this project. Please be sure to include as much details & any resources if possible!
- type: textarea
id: Suggestion
attributes:
label: Suggestion / Feature Request
description: Describe the feature(s) you would like to see added.
placeholder: Tell us your suggestion
validations:
required: true
- type: textarea
id: why-usage
attributes:
label: Why would this be useful?
description: Describe why this feature would be useful.
placeholder: Tell us why this would be useful to have this feature
validations:
required: false
- type: textarea
id: screenshots-assets
attributes:
label: Screenshots/Assets/Relevant links
description: If applicable, add screenshots, assets or any relevant links that can help understand the issue.
placeholder: Provide any relevant material here
validations:
required: false
================================================
FILE: .github/ISSUE_TEMPLATE/question.yaml
================================================
name: ❓ Question
description: Ask a question about this project
title: '❓ [Question]: '
labels: ['question']
body:
- type: markdown
attributes:
value: |
We appreciate your interest in this project. Please be sure to include as much detail & context about your question as possible!
- type: textarea
id: Question
attributes:
label: Your Question
description: Describe your question in detail.
validations:
required: true
================================================
FILE: .github/SECURITY.md
================================================
# Security Policy
## Supported Versions
| Version | Supported |
| ------- | ------------------ |
| 0.0.x | :x: |
## 🔒️ Reporting a Vulnerability
If you have identified a security vulnerability in system or product please `RayVentura` with your findings. We strongly recommend using our `PGP key` to prevent this information from falling into the wrong hands.
### Disclosure Policy
Upon receipt of a security report the following steps will be taken:
- Acknowledge your report within 48 hours, and provide a further more detailed update within 48 hours.
- Confirm the problem and determine the affected versions
- Keep you informed of the progress towards resolving the problem and notify you when the vulnerability has been fixed.
- Audit code to find any potential similar problems.
- Prepare fixes for all releases still under maintenance. These fixes will be released as fast as possible.
- Handle your report with strict confidentiality, and not pass on your personal details to third parties without your permission.
Whilst the issue is under investigation
- **Do** provide as much information as possible.
- **Do not** exploit of the vulnerability or problem you have discovered.
- **Do not** reveal the problem to others until it has been resolved.
================================================
FILE: .github/config.yml
================================================
# Configuration for new-issue-welcome - https://github.com/behaviorbot/new-issue-welcome
# Comment to be posted to on first time issues
newIssueWelcomeComment: >
Thanks for opening your first issue! Reports like these help improve the project!
# Configuration for new-pr-welcome - https://github.com/behaviorbot/new-pr-welcome
# Comment to be posted to on PRs from first time contributors in your repository
newPRWelcomeComment: >
Thanks for opening this pull request!
# Configuration for first-pr-merge - https://github.com/behaviorbot/first-pr-merge
# Comment to be posted to on pull requests merged by a first time user
firstPRMergeComment: >
Congrats on merging your first pull request!
# The keyword to find for Todo Bot issue
todo:
keyword: '@todo'
================================================
FILE: .github/issue_label_bot.yaml
================================================
label-alias:
bug: 'Type: Bug'
feature_request: 'Type: Feature'
question: 'Type: Question'
================================================
FILE: .github/pull_request_template.md
================================================
## Proposed changes
Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue. 👀🔧
## Types of changes
What types of changes does your code introduce to this project?
_Put an `x` in the boxes that apply_ 😄🚀
- [ ] Bugfix (non-breaking change which fixes an issue) 🐛
- [ ] New feature (non-breaking change which adds functionality) ✨
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 💥
- [ ] Documentation Update (if none of the other choices apply) 📖
## Checklist
_Put an `x` in the boxes that apply. You can also fill these out after creating the PR. If you're unsure about any of them, don't hesitate to ask. We're here to help! This is simply a reminder of what we are going to look for before merging your code._ ✅
- [ ] I have read the CONTRIBUTING.md 📚
- [ ] I have added tests that prove my fix is effective or that my feature works ✅✔️
- [ ] I have added necessary documentation (if appropriate) 📝
## Further comments
If this is a relatively large or complex change, kick off the discussion by explaining why you chose the solution you did and what alternatives you considered, etc... 💡❓
## References and related issues (e.g. #1234)
N/A 📌
================================================
FILE: .github/settings.yml
================================================
repository:
# See https://developer.github.com/v3/repos/#edit for all available settings.
# The name of the repository. Changing this will rename the repository
#name: repo-name
# A short description of the repository that will show up on GitHub
#description: description of repo
# A URL with more information about the repository
#homepage: https://example.github.io/
# A comma-separated list of topics to set on the repository
#topics: project, template, project-template
# Either `true` to make the repository private, or `false` to make it public.
#private: false
# Either `true` to enable issues for this repository, `false` to disable them.
has_issues: true
# Either `true` to enable the wiki for this repository, `false` to disable it.
has_wiki: true
# Either `true` to enable downloads for this repository, `false` to disable them.
#has_downloads: true
# Updates the default branch for this repository.
default_branch: stable
# Either `true` to allow squash-merging pull requests, or `false` to prevent
# squash-merging.
#allow_squash_merge: true
# Either `true` to allow merging pull requests with a merge commit, or `false`
# to prevent merging pull requests with merge commits.
#allow_merge_commit: true
# Either `true` to allow rebase-merging pull requests, or `false` to prevent
# rebase-merging.
#allow_rebase_merge: true
# Labels: define labels for Issues and Pull Requests
labels:
- name: 'Type: Bug'
color: e80c0c
description: Something isn't working as expected.
- name: 'Type: Enhancement'
color: 54b2ff
description: Suggest an improvement for an existing feature.
- name: 'Type: Feature'
color: 54b2ff
description: Suggest a new feature.
- name: 'Type: Security'
color: fbff00
description: A problem or enhancement related to a security issue.
- name: 'Type: Question'
color: 9309ab
description: Request for information.
- name: 'Type: Test'
color: ce54e3
description: A problem or enhancement related to a test.
- name: 'Status: Awaiting Review'
color: 24d15d
description: Ready for review.
- name: 'Status: WIP'
color: 07b340
description: Currently being worked on.
- name: 'Status: Waiting'
color: 38C968
description: Waiting on something else to be ready.
- name: 'Status: Stale'
color: 66b38a
description: Has had no activity for some time.
- name: 'Duplicate'
color: EB862D
description: Duplicate of another issue.
- name: 'Invalid'
color: faef50
description: This issue doesn't seem right.
- name: 'Priority: High +'
color: ff008c
description: Task is considered higher-priority.
- name: 'Priority: Low -'
color: 690a34
description: Task is considered lower-priority.
- name: 'Documentation'
color: 2fbceb
description: An issue/change with the documentation.
- name: "Won't fix"
color: C8D9E6
description: Reported issue is working as intended.
- name: '3rd party issue'
color: e88707
description: This issue might be caused by a 3rd party script/package/other reasons
- name: 'Os: Windows'
color: AEB1C2
description: Is Windows-specific
- name: 'Os: Mac'
color: AEB1C2
description: Is Mac-specific
- name: 'Os: Linux'
color: AEB1C2
description: Is Linux-specific
- name: 'Os: Google Colab'
color: AEB1C2
description: Is Google Colab-specific
#
#
# # Collaborators: give specific users access to this repository.
# # See https://developer.github.com/v3/repos/collaborators/#add-user-as-a-collaborator for available options
# collaborators:
# # - username: bkeepers
# # permission: push
# # - username: hubot
# # permission: pull
# # Note: `permission` is only valid on organization-owned repositories.
# # The permission to grant the collaborator. Can be one of:
# # * `pull` - can pull, but not push to or administer this repository.
# # * `push` - can pull and push, but not administer this repository.
# # * `admin` - can pull, push and administer this repository.
# # * `maintain` - Recommended for project managers who need to manage the repository without access to sensitive or destructive actions.
# # * `triage` - Recommended for contributors who need to proactively manage issues and pull requests without write access.
# # See https://developer.github.com/v3/teams/#add-or-update-team-repository for available options
# teams:
# - name: core
# # The permission to grant the team. Can be one of:
# # * `pull` - can pull, but not push to or administer this repository.
# # * `push` - can pull and push, but not administer this repository.
# # * `admin` - can pull, push and administer this repository.
# # * `maintain` - Recommended for project managers who need to manage the repository without access to sensitive or destructive actions.
# # * `triage` - Recommended for contributors who need to proactively manage issues and pull requests without write access.
# permission: admin
# - name: docs
# permission: push
# branches:
# - name: master
# # https://developer.github.com/v3/repos/branches/#update-branch-protection
# # Branch Protection settings. Set to null to disable
# protection:
# # Required. Require at least one approving review on a pull request, before merging. Set to null to disable.
# required_pull_request_reviews:
# # The number of approvals required. (1-6)
# required_approving_review_count: 1
# # Dismiss approved reviews automatically when a new commit is pushed.
# dismiss_stale_reviews: true
# # Blocks merge until code owners have reviewed.
# require_code_owner_reviews: true
# # Specify which users and teams can dismiss pull request reviews. Pass an empty dismissal_restrictions object to disable. User and team dismissal_restrictions are only available for organization-owned repositories. Omit this parameter for personal repositories.
# dismissal_restrictions:
# users: []
# teams: []
# # Required. Require status checks to pass before merging. Set to null to disable
# required_status_checks:
# # Required. Require branches to be up to date before merging.
# strict: true
# # Required. The list of status checks to require in order to merge into this branch
# contexts: []
# # Required. Enforce all configured restrictions for administrators. Set to true to enforce required status checks for repository administrators. Set to null to disable.
# enforce_admins: true
# # Prevent merge commits from being pushed to matching branches
# required_linear_history: true
# # Required. Restrict who can push to this branch. Team and user restrictions are only available for organization-owned repositories. Set to null to disable.
# restrictions:
# apps: []
# users: []
# teams: []
================================================
FILE: .github/workflows/generate_release-changelog.yaml
================================================
name: Create Release
on:
push:
tags:
- 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
jobs:
build:
name: Create Release
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Changelog
uses: Bullrich/generate-release-changelog@master
id: Changelog
env:
REPO: ${{ github.repository }}
- name: Create Release
id: create_release
uses: actions/create-release@latest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
with:
tag_name: ${{ github.ref }}
release_name: Release ${{ github.ref }}
body: |
${{ steps.Changelog.outputs.changelog }}
draft: false
prerelease: false
================================================
FILE: .gitignore
================================================
!*.py
!*.json
!*.yaml
!*.template
*.pyc
**/__pycache__/
test.py
public/*
!public/white_reddit_template.png
!public/subscribe-animation.mp4
z_doc/*
z_other/*
videos/*
.logs/
.editing_assets/*
.database/api_db.json
.database/content_db.json
.database/asset_db.json
flagged/
.vscode
.env
ShortGPT.egg-info
dist
build
setup
test.ipynb
.venv/
MANIFEST.in
schema.json
video.mp4
Untitled-1.ipynb
================================================
FILE: CHANGES.txt
================================================
# CHANGES
## Version 0.1.31
- Fixing issue in AssetDatabase, where it was copying unexisting asset template file
## Version 0.1.3
- Requiring a youtube url as the subscribe animation url in the EditingStep.ADD_SUBSCRIBE_ANIMATION step.
- Adding a default subscribe animation youtube link by default shipped in the AssetDatabase
- Making path imports relative for gpt prompts and editing blocks and flows.
## Version 0.1.2
- Improving logs in content engines
## Version 0.1.1
- Adding AssetType in AssetDatabase
- Adding ApiProvider in api_db
- Fixing pip libary missing editing_framework module, prompt_template module
## Version 0.1.0
- Fixing the AssetDatabase when it's empty
## Version 0.0.2
- Implemented the content_translation_engine; a multilingual video dubbing content engine. The source can be found at shortGPT/engine/content_translation_engine.py.
- Implemented the new EdgeTTS voice module; it can be found at shortgpt/audio/edge_voice_module.
- Added documentation which can be found under docs/.
================================================
FILE: Dockerfile
================================================
# Use an official Python runtime as the parent image
FROM python:3.10-slim-bullseye
RUN apt-get update && apt-get install -y ffmpeg
# Set the working directory in the container to /app
WORKDIR /app
# Install any Python packages specified in requirements.txt
# Copy requirements file
COPY requirements.txt .
# Install dependencies
RUN pip install -r requirements.txt
# Copy the local package directory content into the container at /app
COPY . /app
EXPOSE 31415
# Define any environment variables
# ENV KEY Value
# Print environment variables (for debugging purposes, you can remove this line if not needed)
RUN ["printenv"]
# Run Python script when the container launches
CMD ["python", "-u", "./runShortGPT.py"]
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2024 Ray Ventura
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README-Docker.md
================================================
# To run ShortGPT docker:
First make a .env file with the API keys like this:
```bash
GEMINI_API_KEY=put_your_gemini_api_key_here
OPENAI_API_KEY=sk-_put_your_openai_api_key_here
ELEVENLABS_API_KEY=put_your_eleven_labs_api_key_here
PEXELS_API_KEY=put_your_pexels_api_key_here
```
To run Dockerfile do this:
```bash
docker build -t short_gpt_docker:latest .
docker run -p 31415:31415 --env-file .env short_gpt_docker:latest
```
Export Docker image:
```bash
docker save short_gpt_docker > short_gpt_docker.tar
```
================================================
FILE: README.md
================================================
# 🚀🎬 ShortGPT
## AI video automation framework
⚡ Automating video and short content creation with AI ⚡
Follow the installation steps below for running the web app locally (running the google Colab is highly recommanded).
Please read "installation-notes.md" for more details.
## 🎥 Showcase ([Full video on YouTube](https://youtu.be/hpoSHq-ER8U))
https://github.com/RayVentura/ShortGPT/assets/121462835/a802faad-0fd7-4fcb-aa82-6365c27ea5fe
## 🎥 Voice Dubbing
https://github.com/RayVentura/ShortGPT/assets/121462835/06f51b2d-f8b1-4a23-b299-55e0e18902ef
## 🌟 Show Your Support
We hope you find ShortGPT helpful! If you do, let us know by giving us a star ⭐ on the repo. It's easy, just click on the 'Star' button at the top right of the page. Your support means a lot to us and keeps us motivated to improve and expand ShortGPT. Thank you and happy content creating! 🎉
[](https://github.com/RayVentura/ShortGPT/stargazers)
## 🛠️ How it works

## 📝 Introduction to ShortGPT
ShortGPT is a powerful framework for automating content creation. It simplifies video creation, footage sourcing, voiceover synthesis, and editing tasks. Of the most popular use-cases of ShortGPT is youtube automation and Tiktok creativity program automation.
- 🎞️ **Automated editing framework**: Streamlines the video creation process with an LLM oriented video editing language.
- 📃 **Scripts and Prompts**: Provides ready-to-use scripts and prompts for various LLM automated editing processes.
- 🗣️ **Voiceover / Content Creation**: Supports multiple languages including English 🇺🇸, Spanish 🇪🇸, Arabic 🇦🇪, French 🇫🇷, Polish 🇵🇱, German 🇩🇪, Italian 🇮🇹, Portuguese 🇵🇹, Russian 🇷🇺, Mandarin Chinese 🇨🇳, Japanese 🇯🇵, Hindi 🇮🇳,Korean 🇰🇷, and way over 30 more languages (with EdgeTTS)
- 🔗 **Caption Generation**: Automates the generation of video captions.
- 🌐🎥 **Asset Sourcing**: Sources images and video footage from the internet, connecting with the web and Pexels API as necessary.
- 🧠 **Memory and persistency**: Ensures long-term persistency of automated editing variables with TinyDB.
## 🚀 Quick Start: Run ShortGPT on Google Colab (https://colab.research.google.com/drive/1_2UKdpF6lqxCqWaAcZb3rwMVQqtbisdE?usp=sharing)
If you prefer not to install the prerequisites on your local system, you can use the Google Colab notebook. This option is free and requires no installation setup.
1. Click on the link to the Google Colab notebook: [https://colab.research.google.com/drive/1_2UKdpF6lqxCqWaAcZb3rwMVQqtbisdE?usp=sharing](https://colab.research.google.com/drive/1_2UKdpF6lqxCqWaAcZb3rwMVQqtbisdE?usp=sharing)
2. Once you're in the notebook, simply run the cells in order from top to bottom. You can do this by clicking on each cell and pressing the 'Play' button, or by using the keyboard . Enjoy using ShortGPT!
# Instructions for running shortGPT locally
This guide provides step-by-step instructions for installing shortGPT and its dependencies.
To run ShortGPT locally, you need Docker.
## Installation Steps
To run ShortGPT, you need to have docker. Follow the instructions "installation-notes.md" for more details.
1. For running the Dockerfile, do this:
```bash
docker build -t short_gpt_docker:latest .
docker run -p 31415:31415 --env-file .env short_gpt_docker:latest
```
## Running runShortGPT.py Web Interface
2. After running the script, a Gradio interface should open at your local host on port 31415 (http://localhost:31415)
## Framework overview
- 🎬 The `ContentShortEngine` is designed for creating shorts, handling tasks from script generation to final rendering, including adding YouTube metadata.
- 🎥 The `ContentVideoEngine` is ideal for longer videos, taking care of tasks like generating audio, automatically sourcing background video footage, timing captions, and preparing background assets.
- 🗣️ The `ContentTranslationEngine` is designed to dub and translate entire videos, from mainstream languages to more specific target languages. It takes a video file, or youtube link, transcribe it's audio, translates the content, voices it in a target language, adds captions , and gives back a new video, in a totally different language.
- 🎞️ The automated `EditingEngine`, using Editing Markup Language and JSON, breaks down the editing process into manageable and customizable blocks, comprehensible to Large Language Models.
💡 ShortGPT offers customization options to suit your needs, from language selection to watermark addition.
🔧 As a framework, ShortGPT is adaptable and flexible, offering the potential for efficient, creative content creation.
More documentation incomming, please be patient.
## Technologies Used
ShortGPT utilizes the following technologies to power its functionality:
- **Moviepy**: Moviepy is used for video editing, allowing ShortGPT to make video editing and rendering
- **Openai**: Openai is used for automating the entire process, including generating scripts and prompts for LLM automated editing processes.
- **ElevenLabs**: ElevenLabs is used for voice synthesis, supporting multiple languages for voiceover creation.
- **EdgeTTS**: Microsoft's FREE EdgeTTS is used for voice synthesis, supporting way many more language than ElevenLabs currently.
- **Pexels**: Pexels is used for sourcing background footage, allowing ShortGPT to connect with the web and access a wide range of images and videos.
- **Bing Image**: Bing Image is used for sourcing images, providing a comprehensive database for ShortGPT to retrieve relevant visuals.
These technologies work together to provide a seamless and efficient experience in automating video and short content creation with AI.
## 💁 Contributing
As an open-source project in a rapidly developing field, we are extremely open to contributions, whether it would be in the form of a new feature, improved infrastructure, or better documentation.
================================================
FILE: docs/.gitignore
================================================
# Dependencies
/node_modules
# Production
/build
# Generated files
.docusaurus
.cache-loader
# Misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*
================================================
FILE: docs/README.md
================================================
# ShortGPT Documentation
# Installation
1. `yarn install` in the root of this repository (two level above this directory).
1. In this directory, do `yarn start`.
1. A browser window will open up, pointing to the docs.
# Deployment
Vercel handles the deployment of this website.
================================================
FILE: docs/babel.config.js
================================================
module.exports = {
presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
};
================================================
FILE: docs/docs/api-key-manager.mdx
================================================
---
title: ApiKeyManager in ShortGPT
sidebar_label: ApiKeyManager
---
# ApiKeyManager in ShortGPT
ApiKeyManager is a class in the ShortGPT framework that manages the API keys for different providers. It interacts with the database to get and set API keys.
## Importing ApiKeyManager
```python
from shortGPT.config.api_db import ApiKeyManager, ApiProvider
```
## Using ApiKeyManager
ApiKeyManager provides two main methods: `get_api_key` and `set_api_key`.
### set_api_key
This method is used to set the API key for a specific provider in the database. It takes two arguments: the key (provider name) and the value (API key).
```python
ApiKeyManager.set_api_key(ApiProvider.OPENAI, "your_openai_key")
ApiKeyManager.set_api_key(ApiProvider.ELEVEN_LABS, "your_eleven_labs_key")
```
In the above example, we are setting the API keys for OPENAI and ELEVEN_LABS.
### get_api_key
This method is used to retrieve the API key for a specific provider from the database. It takes one argument: the key (provider name).
```python
openai_key = ApiKeyManager.get_api_key(ApiProvider.OPENAI)
eleven_labs_key = ApiKeyManager.get_api_key(ApiProvider.ELEVEN_LABS)
```
In the above example, we are retrieving the API keys for OPENAI and ELEVEN_LABS.
## Note
The `key` argument in both methods can either be a string or an instance of the `ApiProvider` enum. If it is an instance of `ApiProvider`, the `value` attribute of the enum instance will be used as the key.
```python
ApiKeyManager.set_api_key("OPENAI_API_KEY", "your_openai_key")
ApiKeyManager.set_api_key("ELEVENLABS_API_KEY", "your_eleven_labs_key")
openai_key = ApiKeyManager.get_api_key("OPENAI_API_KEY")
eleven_labs_key = ApiKeyManager.get_api_key("ELEVENLABS_API_KEY")
```
In the above example, we are setting and retrieving the API keys using string keys instead of `ApiProvider` instances.
================================================
FILE: docs/docs/asset-database.mdx
================================================
---
title: AssetDatabase in ShortGPT
sidebar_label: AssetDatabase
---
# AssetDatabase in ShortGPT
The `AssetDatabase` in ShortGPT is a powerful tool that allows you to manage both local and remote assets. This guide will provide you with examples of how to use the `AssetDatabase`.
## Importing AssetDatabase and AssetType
```python
from shortGPT.config.asset_db import AssetDatabase, AssetType
```
## Adding Assets
You can add both remote and local assets to the `AssetDatabase`.
### Adding Remote Assets
```python
AssetDatabase.add_remote_asset("minecraft background cube", AssetType.BACKGROUND_VIDEO, "https://www.youtube.com/watch?v=Pt5_GSKIWQM")
AssetDatabase.add_remote_asset('chill music', AssetType.BACKGROUND_MUSIC, "https://www.youtube.com/watch?v=uUu1NcSHg2E")
```
### Adding Local Assets
```python
AssetDatabase.add_local_asset('my_music', AssetType.AUDIO, "./my_music.wav")
```
## Asset Types
The `AssetType` enum is used to specify the type of asset being added to the `AssetDatabase`. The available asset types are:
- VIDEO
- AUDIO
- IMAGE
- BACKGROUND_MUSIC
- BACKGROUND_VIDEO
- OTHER
## Getting Asset Information
You can retrieve information about an asset using the following methods:
### Get Asset Duration
This method returns the duration in seconds of a video or audio asset. If the asset is neither video nor audio, it returns `None`.
```python
AssetDatabase.get_asset_duration('minecraft background cube')
```
### Get Asset Link
This method returns a source URL, or the path of the resource. If the asset is a YouTube video or audio, it uses `yt-dlp` to extract a download URL or a direct video/audio link.
```python
AssetDatabase.get_asset_link('minecraft background cube')
```
## Synchronizing Local Assets
The `sync_local_assets` method synchronizes the database with local assets found in the `/public` folder. If it doesn't find one, it doesn't do anything.
```python
AssetDatabase.sync_local_assets()
```
## Removing Assets
You can remove an asset from the database by providing its name to the `remove_asset` method.
```python
AssetDatabase.remove_asset('name')
```
## Getting Database State
You can get the state of the asset database as a pandas dataframe using the `get_df` method.
```python
AssetDatabase.get_df()
```
This method returns a dataframe that includes the name, type, link, source, and timestamp of each asset in the database.
================================================
FILE: docs/docs/content-translation-engine.mdx
================================================
---
title: ContentTranslationEngine
sidebar_label: ContentTranslationEngine
---
The `ContentTranslationEngine` in ShortGPT is a powerful tool that automates the process of translating video content. This guide will provide you with an overview of how to use the `ContentTranslationEngine`.
## Importing ContentTranslationEngine
```python
from shortGPT.engine.content_translation_engine import ContentTranslationEngine
```
## Initializing ContentTranslationEngine
The `ContentTranslationEngine` requires a `VoiceModule`, a source URL (either a local video file path or a YouTube link), a target language, and an optional flag indicating whether to use captions for translation.
```python
content_engine = ContentTranslationEngine(voice_module, src_url, target_language, use_captions=False)
```
## Example
```python
from shortGPT.config.api_db import ApiKeyManager, ApiProvider
from shortGPT.engine.content_translation_engine import ContentTranslationEngine
from shortGPT.config.languages import Language
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule, EDGE_TTS_VOICENAME_MAPPING
# Set API Keys
ApiKeyManager.set_api_key(ApiProvider.OPENAI, "your_openai_key")
ApiKeyManager.set_api_key(ApiProvider.ELEVEN_LABS, "your_eleven_labs_key")
# Configure the Voice Module
voice_name = EDGE_TTS_VOICENAME_MAPPING[Language.SPANISH]['male']
voice_module = EdgeTTSVoiceModule(voice_name)
# Configure Content Engine
src_url = "https://www.youtube.com/watch?v=QQz5hj8y1TE"
target_language = Language.SPANISH
use_captions = False
content_engine = ContentTranslationEngine(voice_module, src_url, target_language, use_captions)
# Generate Content
for step_num, step_logs in content_engine.makeContent():
print(f" {step_logs}")
# Get Video Output Path
print(content_engine.get_video_output_path())
```
## How ContentTranslationEngine Works
The `ContentTranslationEngine` works by executing a series of steps defined in the `stepDict` dictionary. Each step is a method that performs a specific task in the video translation process. Here's what each step does:
1. `_transcribe_audio`: Transcribes the audio from the source video
2. `_translate_content`: Translates the transcribed content from the source language to the target language.
3. `_generate_translated_audio`: Generates translated audio using the translated content and the specified `VoiceModule`.
4. `_edit_and_render_video`: Edits and renders the translated video.
5. `_add_metadata`: Adds metadata to the translated video.
## Providing a Source URL
The `ContentTranslationEngine` requires a source URL, which can be either a local video file path or a YouTube link for a youtube Video, or a Youtube Shorts. The engine uses this source URL to retrieve the audio and video content for translation.
## Using Captions for Translation
Set the `use_captions` flag to `True` to see text captions on the video generated that are timed with the audio voice.
================================================
FILE: docs/docs/content-video-engine.mdx
================================================
---
title: ContentVideoEngine
sidebar_label: ContentVideoEngine
---
The `ContentVideoEngine` in ShortGPT is a powerful tool that encapsulates all the automation required to create a video. This guide will provide you with an overview of how to use the `ContentVideoEngine`.
## Importing ContentVideoEngine
```python
from shortGPT.engine.content_video_engine import ContentVideoEngine
```
## Initializing ContentVideoEngine
The `ContentVideoEngine` requires a `VoiceModule`, a script, and optionally a background music name, a watermark (string with the name of your channel / brand), a flag indicating whether the video you want is in vertical format, and a language.
```python
content_engine = ContentVideoEngine(voice_module, script, background_music_name="", watermark=None, isVerticalFormat=False, language=Language.ENGLISH)
```
## Example
```python
from shortGPT.config.api_db import ApiKeyManager, ApiProvider
from shortGPT.config.asset_db import AssetDatabase, AssetType
from shortGPT.engine.content_video_engine import ContentVideoEngine
from shortGPT.config.languages import Language
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule, EDGE_TTS_VOICENAME_MAPPING
# Set API Keys
ApiKeyManager.set_api_key(ApiProvider.OPENAI, "your_openai_key")
ApiKeyManager.set_api_key(ApiProvider.PEXELS, "your_pexels_key")
# Add Assets
AssetDatabase.add_remote_asset('chill music', AssetType.BACKGROUND_MUSIC, "https://www.youtube.com/watch?v=uUu1NcSHg2E")
# Configure the Voice Module
voice_name = EDGE_TTS_VOICENAME_MAPPING[Language.SPANISH]['male']
voice_module = EdgeTTSVoiceModule(voice_name)
# Prepare the script
script = "La inteligencia artificial (IA) está revolucionando nuestro mundo de manera sorprendente. Los robots y asistentes virtuales nos ayudan en nuestras tareas diarias y simplifican nuestra vida. En la medicina, la IA permite diagnósticos más precisos y avances en tratamientos. En la industria automotriz, los vehículos autónomos están cambiando la forma en que nos desplazamos. Sin embargo, surgen interrogantes sobre el impacto en el empleo y la ética de su uso. A pesar de los desafíos, la IA promete un futuro emocionante y lleno de posibilidades. ¿Estamos preparados para abrazar este avance tecnológico?"
# Configure Content Engine
content_engine = ContentVideoEngine(voice_module, script, background_music_name='chill music', language=Language.SPANISH)
# Generate Content
for step_num, step_logs in content_engine.makeContent():
print(f" {step_logs}")
# Get Video Output Path
print(content_engine.get_video_output_path())
```
In this example, we first set the API keys for OpenAI, and Pexels. We then add a remote asset for background music. We configure the voice module to use EdgeTTS for voice synthesis. We prepare a script for the video and then configure the `ContentVideoEngine` with the voice module, script, and background music. We then generate the content and print the output path of the video.
## How ContentVideoEngine Works
The `ContentVideoEngine` works by executing a series of steps defined in the `stepDict` dictionary. Each step is a method that performs a specific task in the video creation process. Here's what each step does:
1. `_generateTempAudio`: Generates a temporary audio file from the provided script using the specified `VoiceModule`.
2. `_speedUpAudio`: Speeds up the generated audio file to match the pace of a typical video.
3. `_timeCaptions`: Generates timed captions for the video based on the script.
4. `_generateVideoSearchTerms`: Generates search terms to find relevant videos on Pexels based on the script.
5. `_generateVideoUrls`: Retrieves video URLs from Pexels using the generated search terms.
6. `_chooseBackgroundMusic`: Chooses background music for the video.
7. `_prepareBackgroundAssets`: Prepares the background assets for the video.
8. `_prepareCustomAssets`: Prepares any custom assets for the video.
9. `_editAndRenderShort`: Edits and renders the video.
10. `_addMetadata`: Adds metadata to the video.
## Using Pexels API
The `ContentVideoEngine` sources video assets from the Pexels API. To use it, you need to provide your Pexels API key. The engine uses this key to retrieve relevant videos based on the search terms generated from the script.
## Providing a Script
The `ContentVideoEngine` requires a script to generate the video. The script is used to generate the audio, captions, and search terms for sourcing videos from Pexels. The script should be a string containing the narration for the video.
================================================
FILE: docs/docs/facts-short-engine.mdx
================================================
---
title: FactsShortEngine
sidebar_label: FactsShortEngine
---
The `FactsShortEngine` in ShortGPT is a content engine specifically designed for generating short videos that present interesting facts. This guide will provide you with an overview of how to use the `FactsShortEngine`.
## Importing FactsShortEngine
```python
from shortGPT.engine.facts_short_engine import FactsShortEngine
```
## Initializing FactsShortEngine
The `FactsShortEngine` requires a `VoiceModule`, the type of facts you want to generate, a background video name, a background music name, the number of images to include in the video, a watermark (string with the name of your channel / brand), and a language.
```python
content_engine = FactsShortEngine(voice_module, facts_type, background_video_name, background_music_name, num_images=None, watermark=None, language=Language.ENGLISH)
```
## Example
```python
from shortGPT.config.api_db import ApiKeyManager, ApiProvider
from shortGPT.config.asset_db import AssetDatabase, AssetType
from shortGPT.engine.facts_short_engine import FactsShortEngine
from shortGPT.config.languages import Language
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule, EDGE_TTS_VOICENAME_MAPPING
# Set API Keys
ApiKeyManager.set_api_key(ApiProvider.OPENAI, "your_openai_key")
# Add Assets
AssetDatabase.add_remote_asset("minecraft background cube", AssetType.BACKGROUND_VIDEO, "https://www.youtube.com/watch?v=Pt5_GSKIWQM")
AssetDatabase.add_remote_asset('chill music', AssetType.BACKGROUND_MUSIC, "https://www.youtube.com/watch?v=uUu1NcSHg2E")
# Configure the Voice Module
voice_name = EDGE_TTS_VOICENAME_MAPPING[Language.GERMAN]['male']
voice_module = EdgeTTSVoiceModule(voice_name)
# Configure Content Engine
facts_video_topic = "Interesting scientific facts from the 19th century"
content_engine = FactsShortEngine(voice_module=voice_module,
facts_type=facts_video_topic,
background_video_name="minecraft background cube", # <--- use the same name you saved in the AssetDatabase
background_music_name='chill music', # <--- use the same name you saved in the AssetDatabase
num_images=5, # If you don't want images in your video, put 0 or None
language=Language.GERMAN)
# Generate Content
for step_num, step_logs in content_engine.makeContent():
print(f" {step_logs}")
# Get Video Output Path
print(content_engine.get_video_output_path())
```
In this example, we first set the API keys for OpenAI. We then add remote assets for the background video and background music. We configure the voice module to use EdgeTTS for voice synthesis. We configure the `FactsShortEngine` with the voice module, facts type, background video name, background music name, number of images, and language. We then generate the content and print the output path of the video.
## How FactsShortEngine Works
The `FactsShortEngine` works by executing a series of steps defined in the `stepDict` dictionary. Each step is a method that performs a specific task in the video creation process. Here's what each step does:
1. `_generateScript`: Generates the script for the facts short using the provided `facts_type`.
2. `_generateTempAudio`: Generates a temporary audio file from the generated script using the specified `VoiceModule`.
3. `_speedUpAudio`: Speeds up the generated audio file to match the pace of a typical video.
4. `_timeCaptions`: Generates timed captions for the video based on the script.
5. `_generateImageSearchTerms`: Generates search terms to find relevant images using the Bing search engine based on the script.
6. `_generateImageUrls`: Retrieves image URLs from Bing using the generated search terms.
7. `_chooseBackgroundMusic`: Chooses background music for the video.
8. `_chooseBackgroundVideo`: Chooses a background video for the video.
9. `_prepareBackgroundAssets`: Prepares the background assets for the video.
10. `_prepareCustomAssets`: Prepares any custom assets for the video.
11. `_editAndRenderShort`: Edits and renders the video.
12. `_addYoutubeMetadata`: Adds metadata to the video.
## Providing a Facts Type
The `FactsShortEngine` requires a facts type to generate the script. The facts type should be a string indicating the specific category or topic of facts you want to include in the video.
That's it! You have now successfully generated a facts short video using the FactsShortEngine in the ShortGPT framework.
================================================
FILE: docs/docs/getting-started.mdx
================================================
---
title: ShortGPT Hello World Example
sidebar_label: ShortGPT Hello World Example
---
# ShortGPT Hello World Example
This guide provides a basic example of how to use the shortGPT framework. ShortGPT encapsulates the entire process of content automation into `content engines`. In this example, we'll show you how to instantiate the FactsShortEngine, which will automate the production of the "Interesting Facts" niche of Shorts.
## Prerequisites
Before you start, make sure you have [followed the installation steps](./how-to-install) and have your API keys ready.
## Code
```python
from shortGPT.config.api_db import ApiKeyManager, ApiProvider
from shortGPT.config.asset_db import AssetDatabase, AssetType
from shortGPT.engine.facts_short_engine import FactsShortEngine
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.config.languages import Language
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule, EDGE_TTS_VOICENAME_MAPPING
# Set API Keys
ApiKeyManager.set_api_key(ApiProvider.OPENAI, "your_openai_key")
ApiKeyManager.set_api_key(ApiProvider.ELEVEN_LABS, "your_eleven_labs_key")
# Add Assets
AssetDatabase.add_remote_asset("minecraft background cube", AssetType.BACKGROUND_VIDEO, "https://www.youtube.com/watch?v=Pt5_GSKIWQM")
AssetDatabase.add_remote_asset('chill music', AssetType.BACKGROUND_MUSIC, "https://www.youtube.com/watch?v=uUu1NcSHg2E")
AssetDatabase.add_local_asset('my_music', AssetType.AUDIO, "./my_music.wav")
USE_ELEVEN_LABS = False
# Configure the ElevenLabs Voice Module
if USE_ELEVEN_LABS:
eleven_labs_key = ApiKeyManager.get_api_key(ApiProvider.ELEVEN_LABS)
voice_module = ElevenLabsVoiceModule(api_key = eleven_labs_key, voiceName="Chris")
else:
## You can also use the EdgeTTS for Free voice synthesis
voice_name = EDGE_TTS_VOICENAME_MAPPING[Language.GERMAN]['male']
voice_module = EdgeTTSVoiceModule(voice_name)
# Configure Content Engine
facts_video_topic = "Interesting scientific facts from the 19th century"
content_engine = FactsShortEngine(voiceModule=voice_module,
facts_type=facts_video_topic,
background_video_name="minecraft background cube", # <--- use the same name you saved in the AssetDatabase
background_music_name='chill music', # <--- use the same name you saved in the AssetDatabase
num_images=5, # If you don't want images in your video, put 0 or None
language=Language.GERMAN)
# Generate Content
for step_num, step_logs in content_engine.makeContent():
print(f" {step_logs}")
# Get Video Output Path
print(content_engine.get_video_output_path())
```
That's it! You have now successfully generated your first content using the shortGPT framework.
================================================
FILE: docs/docs/how-to-install.mdx
================================================
---
title: Step-by-Step Guide to Installing ShortGPT
sidebar_label: Installation Guide
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Launching Your ShortGPT Experience
This guide will walk you through the process of setting up your machine to run the **ShortGPT** library. The setup requires one component: FFmpeg. Follow the steps below to get these dependencies installed.
## Before You Begin
Make sure you have the following installed on your machine:
- Python 3.x
- Pip (Python package installer)
## Installation Process
Here are the steps to install FFmpeg, and the ShortGPT library.
After downloading, follow the installation instructions provided on the website.
### Step 1: Install FFmpeg (Essential for ShortGPT)
FFmpeg is another key component for ShortGPT. Download the FFmpeg binaries from the link below:
> **[👉 Download FFmpeg Here (click on
FFmpeg_Full.msi ) 👈](https://github.com/icedterminal/ffmpeg-installer/releases/tag/6.0.0.20230306)**
The download will include ffmpeg and ffprobe and will add it to your path. Follow the installation instructions as guided.
Step 3: Install ShortGPT Library
- Open a terminal or command prompt.
- Execute the following command:
```bash
pip install --upgrade shortgpt
```
### Step 1: Install FFmpeg (Essential for ShortGPT)
Run the command below in your command line:
```bash
brew install ffmpeg
```
Step 3: Install ShortGPT Library
- Open a terminal or command prompt.
- Execute the following command:
```bash
pip install --upgrade shortgpt
```
### Step 1: Install FFmpeg
Execute the following command:
```bash
sudo apt-get install ffmpeg
```
Step 3: Install ShortGPT Library
- Open a terminal or command prompt.
- Execute the following command:
```bash
pip install --upgrade shortgpt
```
And there you have it! Your machine is now ready to run ShortGPT. Dive into the world of automated video content creation with ShortGPT!
================================================
FILE: docs/docusaurus.config.js
================================================
/* eslint-disable @typescript-eslint/no-var-requires */
const darkCodeTheme = require('prism-react-renderer/themes/dracula');
const lightCodeTheme = require('prism-react-renderer/themes/github');
// With JSDoc @type annotations, IDEs can provide config autocompletion
/** @type {import('@docusaurus/types').DocusaurusConfig} */
(
module.exports = {
title: 'ShortGPT',
tagline:
'Open-Source Framework for AI content automation',
url: 'https://dev.shortgpt.ai',
baseUrl: '/',
favicon: 'img/favicon.ico',
organizationName: 'RayVentura',
projectName: 'ShortGPT',
onBrokenLinks: 'throw',
onBrokenMarkdownLinks: 'throw',
presets: [
[
'@docusaurus/preset-classic',
/** @type {import('@docusaurus/preset-classic').Options} */
({
docs: {
path: 'docs',
sidebarPath: 'sidebars.js',
editUrl:
'https://github.com/RayVentura/ShortGPT/edit/stable/docs/',
versions: {
current: {
label: 'current',
},
},
lastVersion: 'current',
showLastUpdateAuthor: true,
showLastUpdateTime: true,
},
theme: {
customCss: require.resolve('./src/css/custom.css'),
},
}),
],
],
plugins: ['tailwind-loader'],
themeConfig:
/** @type {import('@docusaurus/preset-classic').ThemeConfig} */
({
navbar: {
hideOnScroll: true,
logo: {
alt: 'ShortGPT',
src: 'img/logo.png',
},
items: [
// left
{
label: 'Docs',
to: 'docs/how-to-install',
position: 'right',
},
// right
{
type: 'docsVersionDropdown',
position: 'right',
},
{
href: 'https://github.com/RayVentura/ShortGPT',
position: 'right',
className: 'header-github-link',
},
],
},
colorMode: {
defaultMode: 'light',
disableSwitch: false,
respectPrefersColorScheme: true,
},
announcementBar: {
content:
'⭐️ If you like ShortGPT, give it a star on GitHub! ⭐️',
},
footer: {
links: [
{
title: 'Docs',
items: [
{
label: 'Getting Started',
to: 'docs/how-to-install',
},
],
},
{
title: 'ShortGPT',
items: [
{
label: 'Issues',
to: 'https://github.com/RayVentura/ShortGPT/issues',
},
],
},
{
title: 'Community',
items: [
{
label: 'Discord',
to: 'https://discord.com/invite/bRTacwYrfX',
},
],
},
{
title: 'Social',
items: [
{
label: 'GitHub',
to: 'https://github.com/RayVentura/ShortGPT',
},
{
label: 'Twitter',
to: 'https://twitter.com/RayVenturaHQ',
},
],
},
],
copyright: `ShortGPT ${new Date().getFullYear()}`,
},
prism: {
theme: lightCodeTheme,
darkTheme: darkCodeTheme,
},
}),
}
);
================================================
FILE: docs/package.json
================================================
{
"name": "shortgpt-documentation",
"version": "3.5.1",
"private": true,
"scripts": {
"build:clean": "rm -rf dist build .docusaurus node_modules",
"docusaurus": "docusaurus",
"start": "docusaurus start",
"build": "docusaurus build",
"swizzle": "docusaurus swizzle",
"deploy": "docusaurus deploy",
"clear": "docusaurus clear",
"serve": "docusaurus serve",
"write-translations": "docusaurus write-translations",
"write-heading-ids": "docusaurus write-heading-ids"
},
"dependencies": {
"@algolia/ui-library": "9.10.2",
"@docsearch/react": "3.5.1",
"@docusaurus/core": "2.4.1",
"@docusaurus/preset-classic": "2.4.1",
"@mdx-js/react": "^1.6.22",
"clsx": "^1.1.1",
"file-loader": "6.2.0",
"my-loaders": "file:plugins/my-loaders",
"postcss": "8.4.25",
"postcss-import": "15.0.0",
"postcss-preset-env": "7.8.2",
"prism-react-renderer": "1.2.1",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"tailwind-loader": "file:plugins/tailwind-loader",
"url-loader": "4.1.1"
},
"devDependencies": {
"postcss-loader": "6.2.1",
"tailwindcss": "npm:@tailwindcss/postcss7-compat"
},
"browserslist": {
"production": [
">0.5%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
}
}
================================================
FILE: docs/plugins/my-loaders/index.js
================================================
module.exports = function () {
return {
name: 'loaders',
configureWebpack() {
return {
module: {
rules: [
{
test: /\.(gif|png|jpe?g|svg)$/i,
exclude: /\.(mdx?)$/i,
use: ['file-loader', { loader: 'image-webpack-loader' }],
},
],
},
};
},
};
};
================================================
FILE: docs/plugins/tailwind-loader/index.js
================================================
/* eslint-disable @typescript-eslint/no-var-requires */
module.exports = function () {
return {
name: 'postcss-tailwindcss-loader',
configurePostCss(postcssOptions) {
postcssOptions.plugins.push(
require('postcss-import'),
require('tailwindcss'),
require('postcss-preset-env')({
autoprefixer: {
flexbox: 'no-2009',
},
stage: 4,
})
);
return postcssOptions;
},
};
};
================================================
FILE: docs/sidebars.js
================================================
/**
* Creating a sidebar enables you to:
* - create an ordered group of docs
* - render a sidebar for each doc of that group
* - provide next/previous navigation.
*
* The sidebars can be generated from the filesystem, or explicitly defined here.
*
* Create as many sidebars as you want.
*/
module.exports = {
docs: [
{
type: 'category',
label: 'Introduction',
collapsed: false,
items: ['how-to-install', 'getting-started'],
},
{
type: 'category',
label: 'Content Engines',
collapsed: false,
items: ['content-video-engine', 'content-translation-engine', 'facts-short-engine'],
},
{
type: 'category',
label: 'API Key and Asset',
collapsed: false,
items: ['api-key-manager', 'asset-database'],
},
],
};
================================================
FILE: docs/src/components/Home.js
================================================
import { Hero } from '@algolia/ui-library';
import { useColorMode } from '@docusaurus/theme-common';
import { useBaseUrlUtils } from '@docusaurus/useBaseUrl';
import React from 'react';
import { Link } from 'react-router-dom';
function Home() {
const { withBaseUrl } = useBaseUrlUtils();
const { colorMode } = useColorMode();
React.useEffect(() => {
if (colorMode === 'dark') {
document.querySelector('html').classList.add('dark');
} else {
document.querySelector('html').classList.remove('dark');
}
}, [colorMode]);
function Header() {
return (
🚀🎬 SHORTGPT
Opensource AI Content Automation Framework
>
}
background="cubes"
cta={[
Get started
]}
/>
);
}
function Description() {
return (
<>
{/* Description */}
Automating video and short content creation with AI
ShortGPT is a powerful framework for automating content creation. It simplifies video creation, footage sourcing, voiceover synthesis, and editing tasks.
Automated editing framework
ShortGPT streamlines the video creation process with an LLM oriented video editing language, making it easier to automate editing tasks.
Voiceover / Content Creation
ShortGPT supports multiple languages for voiceover synthesis, making it easy to create content in various languages.
Asset Sourcing
ShortGPT can source images and video footage from the internet, allowing you to easily find and use relevant visuals.
{/* How it works */}
How it works
ShortGPT is an AI-powered framework that automates the process of content creation, from script generation to asset sourcing and video editing.
Automated Editing Framework
ShortGPT employs a heavy usage of LLMs and automated video editing libraries to streamline the video creation process (Ffmpeg, moviepy, ffprobe).
Voiceover / Content Creation
ShortGPT integrates multiple neural voice synthesis engines (ElevenLabs, EdgeTTS), to allow human-like voice quality in the audio generated.
Asset Sourcing
ShortGPT is equipped with an advanced asset sourcing module that can retrieve images and video footage from the internet. This feature allows for the easy incorporation of relevant visuals into the content (Pexels, youtube, and more soon).
{/* Powered by AI */}
Powered by AI
Automated Editing
ShortGPT automates the video editing process, making it faster and more efficient with the help of AI.
Voiceover / Content Creation
ShortGPT supports multiple languages for voiceover synthesis, making it easy to create content in various languages.
Asset Sourcing
ShortGPT can source images and video footage from the internet, allowing you to easily find and use relevant visuals.
'''
@staticmethod
def get_html_video_template(file_url_path, file_name, width="auto", height="auto"):
"""
Generate an HTML code snippet for embedding and downloading a video.
Parameters:
file_url_path (str): The URL or path to the video file.
file_name (str): The name of the video file.
width (str, optional): The width of the video. Defaults to "auto".
height (str, optional): The height of the video. Defaults to "auto".
Returns:
str: The generated HTML code snippet.
"""
html = f'''
'''
return html
================================================
FILE: gui/ui_tab_asset_library.py
================================================
import os
import re
import shutil
import gradio as gr
from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from shortGPT.config.asset_db import AssetDatabase, AssetType
class AssetLibrary(AbstractComponentUI):
def __init__(self):
pass
def create_ui(self):
'''Create the asset library UI'''
with gr.Tab("Asset library") as asset_library_ui:
with gr.Column():
with gr.Accordion("➕ Add your own local assets or from Youtube", open=False) as accordion:
remote = "Add youtube video / audio"
local = "Add local video / audio / image "
assetFlows = gr.Radio([remote, local], label="", value=remote)
with gr.Column(visible=True) as youtubeFlow:
asset_name = gr.Textbox(label="Name (required)")
asset_type = gr.Radio([AssetType.BACKGROUND_VIDEO.value, AssetType.BACKGROUND_MUSIC.value,], value=AssetType.BACKGROUND_VIDEO.value, label="Type")
youtube_url = gr.Textbox(label="URL (https://youtube.com/xyz)")
add_youtube_link = gr.Button("ADD")
with gr.Column(visible=False) as localFileFlow:
local_upload_name = gr.Textbox(label="Name (required)")
upload_type = gr.Radio([AssetType.BACKGROUND_VIDEO.value, AssetType.BACKGROUND_MUSIC.value, AssetType.IMAGE.value], value="background video", interactive=True, label="Type")
video_upload = gr.Video(visible=True, sources="upload", interactive=True)
audio_upload = gr.Audio(visible=False, sources="upload", type="filepath", interactive=True)
image_upload = gr.Image(visible=False, sources="upload", type="filepath", interactive=True)
upload_button = gr.Button("ADD")
upload_type.change(lambda x: (gr.update(visible='video' in x),
gr.update(visible=any(type in x for type in ['audio', 'music'])),
gr.update(visible=x == 'image')),
[upload_type], [video_upload, audio_upload, image_upload])
assetFlows.change(lambda x: (gr.update(visible=x == remote), gr.update(visible=x == local)), [assetFlows], [youtubeFlow, localFileFlow])
with gr.Row():
with gr.Column(scale=3):
asset_dataframe_ui = gr.Dataframe(self.__fulfill_df, interactive=False)
video_choise = gr.Radio(["background video", "background music"], value="background video", label="Type")
with gr.Column(scale=2):
gr.Markdown("Preview")
asset_preview_ui = gr.HTML(self.__get_first_preview)
delete_button = gr.Button("🗑️ Delete", scale=0, variant="primary")
delete_button.click(self.__delete_clicked, [delete_button], [asset_dataframe_ui, asset_preview_ui, delete_button, AssetComponentsUtils.background_video_checkbox(), AssetComponentsUtils.background_music_checkbox()])
asset_dataframe_ui.select(self.__preview_asset, [asset_dataframe_ui], [asset_preview_ui, delete_button])
add_youtube_link.click(
self.__verify_youtube_asset_inputs, [asset_name, youtube_url, asset_type], []).success(self.__add_youtube_asset, [asset_name, youtube_url, asset_type], [asset_dataframe_ui, asset_preview_ui, delete_button, accordion, AssetComponentsUtils.background_video_checkbox(), AssetComponentsUtils.background_music_checkbox()]).success(lambda: gr.update(open=False), [accordion])
upload_button.click(
self.__verify_and_upload_local_asset, [upload_type, local_upload_name, video_upload, audio_upload, image_upload, ], []).success(self.__upload_local_asset, [upload_type, local_upload_name, video_upload, audio_upload, image_upload, ], [asset_dataframe_ui, asset_preview_ui, delete_button, accordion, AssetComponentsUtils.background_video_checkbox(), AssetComponentsUtils.background_music_checkbox()]).success(lambda: gr.update(open=False), [accordion])
return asset_library_ui
def __fulfill_df(self):
'''Get the dataframe of assets'''
return AssetDatabase.get_df()
def __verify_youtube_asset_inputs(self, asset_name, yt_url, type):
if not asset_name or not re.match("^[A-Za-z0-9 _-]*$", asset_name):
raise gr.Error('Invalid asset name. Please provide a valid name that you will recognize (Only use letters and numbers)')
if not yt_url.startswith("https://youtube.com/") and not yt_url.startswith("https://www.youtube.com/"):
raise gr.Error('Invalid YouTube URL. Please provide a valid URL.')
if AssetDatabase.asset_exists(asset_name):
raise gr.Error('An asset already exists with this name, please choose a different name.')
def __validate_asset_name(self, asset_name):
'''Validate asset name'''
if not asset_name or not re.match("^[A-Za-z0-9 _-]*$", asset_name):
raise gr.Error('Invalid asset name. Please provide a valid name that you will recognize (Only use letters and numbers)')
if AssetDatabase.asset_exists(asset_name):
raise gr.Error('An asset already exists with this name, please choose a different name.')
def __validate_youtube_url(self, yt_url):
'''Validate YouTube URL'''
if not yt_url.startswith("https://youtube.com/") and not yt_url.startswith("https://www.youtube.com/"):
raise gr.Error('Invalid YouTube URL. Please provide a valid URL.')
def __verify_and_add_youtube_asset(self, asset_name, yt_url, type):
'''Verify and add a youtube asset to the database'''
self.__validate_asset_name(asset_name)
self.__validate_youtube_url(yt_url)
return self.__add_youtube_asset(asset_name, yt_url, type)
def __add_youtube_asset(self, asset_name, yt_url, type):
'''Add a youtube asset'''
AssetDatabase.add_remote_asset(asset_name, AssetType(type), yt_url)
latest_df = AssetDatabase.get_df()
return gr.DataFrame.update(value=latest_df), gr.update(value=self.__get_asset_embed(latest_df, 0)),\
gr.update(value=f"🗑️ Delete {latest_df.iloc[0]['name']}"),\
gr.update(open=False),\
gr.update(choices=AssetComponentsUtils.getBackgroundVideoChoices(), interactive=True),\
gr.update(choices=AssetComponentsUtils.getBackgroundMusicChoices(), interactive=True)
def __get_first_preview(self):
'''Get the first preview'''
return self.__get_asset_embed(AssetDatabase.get_df(), 0)
def __delete_clicked(self, button_name):
'''Delete an asset'''
asset_name = button_name.split("🗑️ Delete ")[-1]
AssetDatabase.remove_asset(asset_name)
data = AssetDatabase.get_df()
if len(data) > 0:
return gr.update(value=data),\
gr.update(value=self.__get_asset_embed(data, 0)),\
gr.update(value=f"🗑️ Delete {data.iloc[0]['name']}"),\
gr.update(choices=AssetComponentsUtils.getBackgroundVideoChoices(), interactive=True),\
gr.update(choices=AssetComponentsUtils.getBackgroundMusicChoices(), interactive=True)
return gr.Dataframe.update(value=data),\
gr.update(visible=True),\
gr.update(value="🗑️ Delete"),\
gr.update(choices=AssetComponentsUtils.getBackgroundVideoChoices(), interactive=True),\
gr.update(choices=AssetComponentsUtils.getBackgroundMusicChoices(), interactive=True)
def __preview_asset(self, data, evt: gr.SelectData):
'''Preview the asset with the given name'''
html_embed = self.__get_asset_embed(data, evt.index[0])
return gr.update(value=html_embed), gr.update(value=f"🗑️ Delete {data.iloc[evt.index[0]]['name']}")
def __get_asset_embed(self, data, row):
'''Get the embed html for the asset at the given row'''
embed_height = 300
embed_width = 300
asset_link = data.iloc[row]['link']
embed_html = ''
if 'youtube.com' in asset_link:
asset_link_split = asset_link.split('?v=')
if asset_link_split[0] == asset_link:
asset_link_split = asset_link.split('/')
# if the last character is a /, remove it
if asset_link_split[-1] == '/':
asset_link_split = asset_link_split[:-1]
asset_link_split = asset_link_split[-1]
else:
asset_link_split = asset_link_split[-1]
asset_link = f"https://youtube.com/embed/{asset_link_split}"
embed_html = f''
elif 'public/' in asset_link or 'public/' in asset_link:
asset_link = f"http://localhost:31415/gradio_api/file={asset_link}"
file_ext = asset_link.split('.')[-1]
if file_ext in ['mp3', 'wav', 'ogg']:
audio_type = 'audio/mpeg' if file_ext == 'mp3' else f'audio/{file_ext}'
embed_html = f''
elif file_ext in ['mp4', 'webm', 'ogg', 'mov']:
video_type = 'video/mp4' if file_ext == 'mp4' else f'video/{file_ext}'
embed_html = f''
elif file_ext in ['jpg', 'jpeg', 'png', 'gif']:
embed_html = f''
else:
embed_html = 'Unsupported file type'
return embed_html
@staticmethod
def __clean_filename(filename):
'''Clean the filename'''
return re.sub('[\\\\/:*?"<>|]', '', filename)
def __verify_and_upload_local_asset(self, upload_type, upload_name, video_path, audio_path, image_path):
'''Verify and upload a local asset to the database'''
self.__validate_asset_name(upload_name)
path_dict = {
AssetType.VIDEO.value: video_path,
AssetType.BACKGROUND_VIDEO.value: video_path,
AssetType.AUDIO.value: audio_path,
AssetType.BACKGROUND_MUSIC.value: audio_path,
AssetType.IMAGE.value: image_path
}
if not os.path.exists(path_dict[upload_type]):
raise gr.Error(f'The file does not exist at the given path.')
return self.__upload_local_asset(upload_type, upload_name, video_path, audio_path, image_path)
def __upload_local_asset(self, upload_type, upload_name, video_path, audio_path, image_path):
'''Upload a local asset to the database'''
path_dict = {
AssetType.VIDEO.value: video_path,
AssetType.BACKGROUND_VIDEO.value: video_path,
AssetType.AUDIO.value: audio_path,
AssetType.BACKGROUND_MUSIC.value: audio_path,
AssetType.IMAGE.value: image_path
}
new_path = "public/" + self.__clean_filename(upload_name) + "." + path_dict[upload_type].split(".")[-1]
shutil.move(path_dict[upload_type], new_path)
AssetDatabase.add_local_asset(upload_name, AssetType(upload_type), new_path)
latest_df = AssetDatabase.get_df()
return gr.DataFrame.update(value=latest_df), gr.update(value=self.__get_asset_embed(latest_df, 0)),\
gr.update(value=f"🗑️ Delete {latest_df.iloc[0]['name']}"),\
gr.update(open=False),\
gr.update(choices=AssetComponentsUtils.getBackgroundVideoChoices(), interactive=True),\
gr.update(choices=AssetComponentsUtils.getBackgroundMusicChoices(), interactive=True)
================================================
FILE: gui/ui_tab_config.py
================================================
import time
import gradio as gr
from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from shortGPT.api_utils.eleven_api import ElevenLabsAPI
from shortGPT.config.api_db import ApiKeyManager
class ConfigUI(AbstractComponentUI):
def __init__(self):
self.api_key_manager = ApiKeyManager()
eleven_key = self.api_key_manager.get_api_key('ELEVENLABS_API_KEY')
self.eleven_labs_api = ElevenLabsAPI(eleven_key) if eleven_key else None
def on_show(self, button_text, textbox, button):
'''Show or hide the API key'''
if button_text == "Show":
return gr.update(type="text"), gr.update(value="Hide")
return gr.update(type="password"), gr.update(value="Show")
def verify_eleven_key(self, eleven_key, remaining_chars):
'''Verify the ElevenLabs API key'''
if (eleven_key and self.api_key_manager.get_api_key('ELEVENLABS_API_KEY') != eleven_key):
try:
self.eleven_labs_api = ElevenLabsAPI(eleven_key)
print(self.eleven_labs_api)
return self.eleven_labs_api.get_remaining_characters()
except Exception as e:
raise gr.Error(e.args[0])
return remaining_chars
def save_keys(self, openai_key, eleven_key, pexels_key, gemini_key):
'''Save the keys in the database'''
if (self.api_key_manager.get_api_key("OPENAI_API_KEY") != openai_key):
self.api_key_manager.set_api_key("OPENAI_API_KEY", openai_key)
if (self.api_key_manager.get_api_key("PEXELS_API_KEY") != pexels_key):
self.api_key_manager.set_api_key("PEXELS_API_KEY", pexels_key)
if (self.api_key_manager.get_api_key('ELEVENLABS_API_KEY') != eleven_key):
self.api_key_manager.set_api_key("ELEVENLABS_API_KEY", eleven_key)
new_eleven_voices = AssetComponentsUtils.getElevenlabsVoices()
return gr.update(value=openai_key),\
gr.update(value=eleven_key),\
gr.update(value=pexels_key),\
gr.update(value=gemini_key),\
gr.update(choices=new_eleven_voices),\
gr.update(choices=new_eleven_voices)
if (self.api_key_manager.get_api_key("GEMINI_API_KEY") != gemini_key):
self.api_key_manager.set_api_key("GEMINI_API_KEY", gemini_key)
return gr.update(value=openai_key),\
gr.update(value=eleven_key),\
gr.update(value=pexels_key),\
gr.update(value=gemini_key),\
gr.update(visible=True),\
gr.update(visible=True)
def get_eleven_remaining(self,):
'''Get the remaining characters from ElevenLabs API'''
if (self.eleven_labs_api):
try:
return self.eleven_labs_api.get_remaining_characters()
except Exception as e:
return e.args[0]
return ""
def back_to_normal(self):
'''Back to normal after 3 seconds'''
time.sleep(3)
return gr.update(value="save")
def create_ui(self):
'''Create the config UI'''
with gr.Tab("Config") as config_ui:
with gr.Row():
with gr.Column():
with gr.Row():
openai_textbox = gr.Textbox(value=self.api_key_manager.get_api_key("OPENAI_API_KEY"), label=f"OPENAI API KEY", show_label=True, interactive=True, show_copy_button=True, type="password", scale=40)
show_openai_key = gr.Button("Show", size="sm", scale=1)
show_openai_key.click(self.on_show, [show_openai_key], [openai_textbox, show_openai_key])
with gr.Row():
eleven_labs_textbox = gr.Textbox(value=self.api_key_manager.get_api_key("ELEVENLABS_API_KEY"), label=f"ELEVENLABS_API_KEY", show_label=True, interactive=True, show_copy_button=True, type="password", scale=40)
eleven_characters_remaining = gr.Textbox(value=self.get_eleven_remaining(), label=f"CHARACTERS REMAINING", show_label=True, interactive=False, type="text", scale=40)
show_eleven_key = gr.Button("Show", size="sm", scale=1)
show_eleven_key.click(self.on_show, [show_eleven_key], [eleven_labs_textbox, show_eleven_key])
with gr.Row():
pexels_textbox = gr.Textbox(value=self.api_key_manager.get_api_key("PEXELS_API_KEY"), label=f"PEXELS KEY", show_label=True, interactive=True, show_copy_button=True, type="password", scale=40)
show_pexels_key = gr.Button("Show", size="sm", scale=1)
show_pexels_key.click(self.on_show, [show_pexels_key], [pexels_textbox, show_pexels_key])
with gr.Row():
gemini_textbox = gr.Textbox(value=self.api_key_manager.get_api_key("GEMINI_API_KEY"), label=f"GEMINI API KEY", show_label=True, interactive=True, show_copy_button=True, type="password", scale=40)
show_gemini_key = gr.Button("Show", size="sm", scale=1)
show_gemini_key.click(self.on_show, [show_gemini_key], [gemini_textbox, show_gemini_key])
save_button = gr.Button("save", size="sm", scale=1)
save_button.click(self.verify_eleven_key, [eleven_labs_textbox, eleven_characters_remaining], [eleven_characters_remaining]).success(
self.save_keys, [openai_textbox, eleven_labs_textbox, pexels_textbox, gemini_textbox], [openai_textbox, eleven_labs_textbox, pexels_textbox, gemini_textbox, AssetComponentsUtils.voiceChoice(), AssetComponentsUtils.voiceChoiceTranslation()])
save_button.click(lambda _: gr.update(value="Keys Saved !"), [], [save_button])
save_button.click(self.back_to_normal, [], [save_button])
return config_ui
================================================
FILE: gui/ui_tab_short_automation.py
================================================
import os
import time
import traceback
import gradio as gr
from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from gui.ui_components_html import GradioComponentsHTML
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.config.api_db import ApiKeyManager
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
ELEVEN_SUPPORTED_LANGUAGES,
LANGUAGE_ACRONYM_MAPPING,
Language)
from shortGPT.engine.facts_short_engine import FactsShortEngine
from shortGPT.engine.reddit_short_engine import RedditShortEngine
class ShortAutomationUI(AbstractComponentUI):
def __init__(self, shortGptUI: gr.Blocks):
self.shortGptUI = shortGptUI
self.embedHTML = '
'
self.progress_counter = 0
self.short_automation = None
def create_ui(self):
with gr.Row(visible=False) as short_automation:
with gr.Column():
numShorts = gr.Number(label="Number of shorts", minimum=1, value=1)
short_type = gr.Radio(["Reddit Story shorts", "Historical Facts shorts", "Scientific Facts shorts", "Custom Facts shorts"], label="Type of shorts generated", value="Reddit Story shorts", interactive=True)
facts_subject = gr.Textbox(label="Write a subject for your facts (example: Football facts)", interactive=True, visible=False)
short_type.change(lambda x: gr.update(visible=x == "Custom Facts shorts"), [short_type], [facts_subject])
tts_engine = gr.Radio([AssetComponentsUtils.ELEVEN_TTS, AssetComponentsUtils.EDGE_TTS], label="Text to speech engine", value=AssetComponentsUtils.EDGE_TTS, interactive=True)
self.tts_engine = tts_engine.value
with gr.Column(visible=False) as eleven_tts:
language_eleven = gr.Radio([lang.value for lang in ELEVEN_SUPPORTED_LANGUAGES], label="Language", value="English", interactive=True)
voice_eleven = AssetComponentsUtils.voiceChoice(provider=AssetComponentsUtils.ELEVEN_TTS)
with gr.Column(visible=True) as edge_tts:
language_edge = gr.Dropdown([lang.value.upper() for lang in Language], label="Language", value="ENGLISH", interactive=True)
def tts_engine_change(x):
self.tts_engine = x
return gr.update(visible=x == AssetComponentsUtils.ELEVEN_TTS), gr.update(visible=x == AssetComponentsUtils.EDGE_TTS)
tts_engine.change(tts_engine_change, tts_engine, [eleven_tts, edge_tts])
useImages = gr.Checkbox(label="Use images", value=True)
numImages = gr.Radio([5, 10, 25], value=10, label="Number of images per short", visible=True, interactive=True)
useImages.change(lambda x: gr.update(visible=x), useImages, numImages)
addWatermark = gr.Checkbox(label="Add watermark")
watermark = gr.Textbox(label="Watermark (your channel name)", visible=False)
addWatermark.change(lambda x: gr.update(visible=x), [addWatermark], [watermark])
AssetComponentsUtils.background_video_checkbox()
AssetComponentsUtils.background_music_checkbox()
createButton = gr.Button("Create Shorts")
generation_error = gr.HTML(visible=False)
video_folder = gr.Button("📁", visible=True)
output = gr.HTML('')
video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/")))
createButton.click(self.inspect_create_inputs, inputs=[AssetComponentsUtils.background_video_checkbox(), AssetComponentsUtils.background_music_checkbox(), watermark, short_type, facts_subject], outputs=[generation_error]).success(self.create_short, inputs=[
numShorts,
short_type,
tts_engine,
language_eleven,
language_edge,
numImages,
watermark,
AssetComponentsUtils.background_video_checkbox(),
AssetComponentsUtils.background_music_checkbox(),
facts_subject,
voice_eleven,
], outputs=[output, video_folder, generation_error])
self.short_automation = short_automation
return self.short_automation
def create_short(self, numShorts, short_type, tts_engine, language_eleven, language_edge, numImages, watermark, background_video_list, background_music_list, facts_subject, voice_eleven, progress=gr.Progress()):
'''Creates a short'''
try:
numShorts = int(numShorts)
numImages = int(numImages) if numImages else None
background_videos = (background_video_list * ((numShorts // len(background_video_list)) + 1))[:numShorts]
background_musics = (background_music_list * ((numShorts // len(background_music_list)) + 1))[:numShorts]
if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
language = Language(language_eleven.lower().capitalize())
voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVENLABS_API_KEY'), voice_eleven, checkElevenCredits=True)
elif tts_engine == AssetComponentsUtils.EDGE_TTS:
language = Language(language_edge.lower().capitalize())
voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[language]['male'])
for i in range(numShorts):
shortEngine = self.create_short_engine(short_type=short_type, voice_module=voice_module, language=language, numImages=numImages, watermark=watermark,
background_video=background_videos[i], background_music=background_musics[i], facts_subject=facts_subject)
num_steps = shortEngine.get_total_steps()
def logger(prog_str):
progress(self.progress_counter / (num_steps * numShorts), f"Making short {i+1}/{numShorts} - {prog_str}")
shortEngine.set_logger(logger)
for step_num, step_info in shortEngine.makeContent():
print(step_num, step_info,self.progress_counter )
progress(self.progress_counter / (num_steps * numShorts), f"Making short {i+1}/{numShorts} - {step_info}")
self.progress_counter += 1
video_path = shortEngine.get_video_output_path()
current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url
file_url_path = f"{current_url}gradio_api/file={video_path}"
file_name = video_path.split("/")[-1].split("\\")[-1]
self.embedHTML += f'''
'''
yield self.embedHTML + '
', gr.update(visible=True), gr.update(visible=False)
except Exception as e:
traceback_str = ''.join(traceback.format_tb(e.__traceback__))
error_name = type(e).__name__.capitalize() + " : " + f"{e.args[0]}"
print("Error", traceback_str)
error_html = GradioComponentsHTML.get_html_error_template().format(error_message=error_name, stack_trace=traceback_str)
yield self.embedHTML + '', gr.update(visible=True), gr.update(value=error_html, visible=True)
def inspect_create_inputs(self, background_video_list, background_music_list, watermark, short_type, facts_subject, progress=gr.Progress()):
if short_type == "Custom Facts shorts":
if not facts_subject:
raise gr.Error("Please write down your facts short's subject")
if not background_video_list:
raise gr.Error("Please select at least one background video.")
if not background_music_list:
raise gr.Error("Please select at least one background music.")
if watermark != "":
if not watermark.replace(" ", "").isalnum():
raise gr.Error("Watermark should only contain letters and numbers.")
if len(watermark) > 25:
raise gr.Error("Watermark should not exceed 25 characters.")
if len(watermark) < 3:
raise gr.Error("Watermark should be at least 3 characters long.")
openai_key = ApiKeyManager.get_api_key("OPENAI_API_KEY")
gemini_key = ApiKeyManager.get_api_key("GEMINI_API_KEY")
if not openai_key and not gemini_key:
raise gr.Error("GEMINI OR OPENAI API key is missing. Please go to the config tab and enter the API key.")
eleven_labs_key = ApiKeyManager.get_api_key("ELEVENLABS_API_KEY")
if self.tts_engine == AssetComponentsUtils.ELEVEN_TTS and not eleven_labs_key:
raise gr.Error("ELEVENLABS_API_KEY API key is missing. Please go to the config tab and enter the API key.")
return gr.update(visible=False)
def create_short_engine(self, short_type, voice_module, language, numImages, watermark, background_video, background_music, facts_subject):
if short_type == "Reddit Story shorts":
return RedditShortEngine(voice_module, background_video_name=background_video, background_music_name=background_music, num_images=numImages, watermark=watermark, language=language)
if "fact" in short_type.lower():
if "custom" in short_type.lower():
facts_subject = facts_subject
else:
facts_subject = short_type
return FactsShortEngine(voice_module, facts_type=facts_subject, background_video_name=background_video, background_music_name=background_music, num_images=numImages, watermark=watermark, language=language)
raise gr.Error(f"Short type does not have a valid short engine: {short_type}")
================================================
FILE: gui/ui_tab_video_automation.py
================================================
import os
import traceback
from enum import Enum
import gradio as gr
from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from gui.ui_components_html import GradioComponentsHTML
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.config.api_db import ApiKeyManager
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
ELEVEN_SUPPORTED_LANGUAGES,
LANGUAGE_ACRONYM_MAPPING,
Language)
from shortGPT.engine.content_video_engine import ContentVideoEngine
from shortGPT.gpt import gpt_chat_video
class Chatstate(Enum):
ASK_ORIENTATION = 1
ASK_VOICE_MODULE = 2
ASK_LANGUAGE = 3
ASK_DESCRIPTION = 4
GENERATE_SCRIPT = 5
ASK_SATISFACTION = 6
MAKE_VIDEO = 7
ASK_CORRECTION = 8
class VideoAutomationUI(AbstractComponentUI):
def __init__(self, shortGptUI: gr.Blocks):
self.shortGptUI = shortGptUI
self.state = Chatstate.ASK_ORIENTATION
self.isVertical = None
self.voice_module = None
self.language = None
self.script = ""
self.video_html = ""
self.videoVisible = False
self.video_automation = None
self.chatbot = None
self.msg = None
self.restart_button = None
self.video_folder = None
self.errorHTML = None
self.outHTML = None
def is_key_missing(self):
openai_key = ApiKeyManager.get_api_key("OPENAI_API_KEY")
gemini_key = ApiKeyManager.get_api_key("GEMINI_API_KEY")
if not openai_key and not gemini_key:
return "Your Genmini or OpenAI key is missing. Please go to the config tab and enter the API key."
pexels_api_key = ApiKeyManager.get_api_key("PEXELS_API_KEY")
if not pexels_api_key:
return "Your Pexels API key is missing. Please go to the config tab and enter the API key."
def generate_script(self, message, language):
return gpt_chat_video.generateScript(message, language)
def correct_script(self, script, correction):
return gpt_chat_video.correctScript(script, correction)
def make_video(self, script, voice_module, isVertical, progress):
videoEngine = ContentVideoEngine(voiceModule=voice_module, script=script, isVerticalFormat=isVertical)
num_steps = videoEngine.get_total_steps()
progress_counter = 0
def logger(prog_str):
progress(progress_counter / (num_steps), f"Creating video - {progress_counter} - {prog_str}")
videoEngine.set_logger(logger)
for step_num, step_info in videoEngine.makeContent():
progress(progress_counter / (num_steps), f"Creating video - {step_info}")
progress_counter += 1
video_path = videoEngine.get_video_output_path()
return video_path
def reset_components(self):
return gr.update(value=self.initialize_conversation()), gr.update(visible=True), gr.update(value="", visible=False), gr.update(value="", visible=False)
def chatbot_conversation(self):
def respond(message, chat_history, progress=gr.Progress()):
# global self.state, isVertical, voice_module, language, script, videoVisible, video_html
error_html = ""
errorVisible = False
inputVisible = True
folderVisible = False
if self.state == Chatstate.ASK_ORIENTATION:
errorMessage = self.is_key_missing()
if errorMessage:
bot_message = errorMessage
else:
self.isVertical = "vertical" in message.lower() or "short" in message.lower()
self.state = Chatstate.ASK_VOICE_MODULE
bot_message = "Which voice module do you want to use? Please type 'ElevenLabs' for high quality, 'EdgeTTS' for free medium quality voice."
elif self.state == Chatstate.ASK_VOICE_MODULE:
if "elevenlabs" in message.lower():
eleven_labs_key = ApiKeyManager.get_api_key("ELEVENLABS_API_KEY")
if not eleven_labs_key:
bot_message = "Your ELEVENLABS_API_KEY API key is missing. Please go to the config tab and enter the API key."
return
self.voice_module = ElevenLabsVoiceModule
language_choices = [lang.value for lang in ELEVEN_SUPPORTED_LANGUAGES]
elif "edgetts" in message.lower():
self.voice_module = EdgeTTSVoiceModule
language_choices = [lang.value for lang in Language]
else:
bot_message = "Invalid voice module. Please type 'ElevenLabs' or 'EdgeTTS'."
return
self.state = Chatstate.ASK_LANGUAGE
bot_message = f"🌐What language will be used in the video?🌐 Choose from one of these ({', '.join(language_choices)})"
elif self.state == Chatstate.ASK_LANGUAGE:
self.language = next((lang for lang in Language if lang.value.lower() in message.lower()), None)
self.language = self.language if self.language else Language.ENGLISH
if self.voice_module == ElevenLabsVoiceModule:
self.voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVENLABS_API_KEY'), "Chris", checkElevenCredits=True)
elif self.voice_module == EdgeTTSVoiceModule:
self.voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[self.language]['male'])
self.state = Chatstate.ASK_DESCRIPTION
bot_message = "Amazing 🔥 ! 📝Can you describe thoroughly the subject of your video?📝 I will next generate you a script based on that description"
elif self.state == Chatstate.ASK_DESCRIPTION:
self.script = self.generate_script(message, self.language.value)
self.state = Chatstate.ASK_SATISFACTION
bot_message = f"📝 Here is your generated script: \n\n--------------\n{self.script}\n\n・Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. 👍👎"
elif self.state == Chatstate.ASK_SATISFACTION:
if "yes" in message.lower():
self.state = Chatstate.MAKE_VIDEO
inputVisible = False
yield gr.update(visible=False), gr.update(value=[[None, "Your video is being made now! 🎬"]]), gr.update(value="", visible=False), gr.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=False)
try:
video_path = self.make_video(self.script, self.voice_module, self.isVertical, progress=progress)
file_name = video_path.split("/")[-1].split("\\")[-1]
current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url
file_url_path = f"{current_url}gradio_api/file={video_path}"
self.video_html = f'''
'''
self.videoVisible = True
folderVisible = True
bot_message = "Your video is completed !🎬. Scroll down below to open its file location."
except Exception as e:
traceback_str = ''.join(traceback.format_tb(e.__traceback__))
error_name = type(e).__name__.capitalize() + " : " + f"{e.args[0]}"
errorVisible = True
gradio_content_automation_ui_error_template = GradioComponentsHTML.get_html_error_template()
error_html = gradio_content_automation_ui_error_template.format(error_message=error_name, stack_trace=traceback_str)
bot_message = "We encountered an error while making this video ❌"
print("Error", traceback_str)
yield gr.update(visible=False), gr.update(value=[[None, "Your video is being made now! 🎬"]]), gr.update(value="", visible=False), gr.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True)
else:
self.state = Chatstate.ASK_CORRECTION # change self.state to ASK_CORRECTION
bot_message = "Explain me what you want different in the script"
elif self.state == Chatstate.ASK_CORRECTION: # new self.state
self.script = self.correct_script(self.script, message) # call generateScript with correct=True
self.state = Chatstate.ASK_SATISFACTION
bot_message = f"📝 Here is your corrected script: \n\n--------------\n{self.script}\n\n・Are you satisfied with the script and ready to proceed with creating the video? Please respond with 'YES' or 'NO'. 👍👎"
chat_history.append((message, bot_message))
yield gr.update(value="", visible=inputVisible), gr.update(value=chat_history), gr.update(value=self.video_html, visible=self.videoVisible), gr.update(value=error_html, visible=errorVisible), gr.update(visible=folderVisible), gr.update(visible=True)
return respond
def initialize_conversation(self):
self.state = Chatstate.ASK_ORIENTATION
self.isVertical = None
self.language = None
self.script = ""
self.video_html = ""
self.videoVisible = False
return [[None, "🤖 Welcome to ShortGPT! 🚀 I'm a python framework aiming to simplify and automate your video editing tasks.\nLet's get started! 🎥🎬\n\n Do you want your video to be in landscape or vertical format? (landscape OR vertical)"]]
def reset_conversation(self):
self.state = Chatstate.ASK_ORIENTATION
self.isVertical = None
self.language = None
self.script = ""
self.video_html = ""
self.videoVisible = False
def create_ui(self):
with gr.Row(visible=False) as self.video_automation:
with gr.Column():
self.chatbot = gr.Chatbot(self.initialize_conversation, height=365)
self.msg = gr.Textbox()
self.restart_button = gr.Button("Restart")
self.video_folder = gr.Button("📁", visible=False)
self.video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/")))
respond = self.chatbot_conversation()
self.errorHTML = gr.HTML(visible=False)
self.outHTML = gr.HTML('')
self.restart_button.click(self.reset_components, [], [self.chatbot, self.msg, self.errorHTML, self.outHTML])
self.restart_button.click(self.reset_conversation, [])
self.msg.submit(respond, [self.msg, self.chatbot], [self.msg, self.chatbot, self.outHTML, self.errorHTML, self.video_folder, self.restart_button])
return self.video_automation
================================================
FILE: gui/ui_tab_video_translation.py
================================================
import os
import time
import traceback
import gradio as gr
from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from gui.ui_components_html import GradioComponentsHTML
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.config.api_db import ApiKeyManager
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
ELEVEN_SUPPORTED_LANGUAGES,
LANGUAGE_ACRONYM_MAPPING,
Language)
from shortGPT.engine.multi_language_translation_engine import MultiLanguageTranslationEngine
class VideoTranslationUI(AbstractComponentUI):
def __init__(self, shortGptUI: gr.Blocks):
self.shortGptUI = shortGptUI
self.eleven_language_choices = [lang.value.upper() for lang in ELEVEN_SUPPORTED_LANGUAGES]
self.embedHTML = '
'
self.progress_counter = 0
self.video_translation_ui = None
def create_ui(self):
with gr.Row(visible=False) as video_translation_ui:
with gr.Column():
videoType = gr.Radio(["Youtube link", "Video file"], label="Input your video", value="Youtube link", interactive=True)
video_path = gr.Video(sources="upload", interactive=True, width=533.33, height=300, visible=False)
yt_link = gr.Textbox(label="Youtube link (https://youtube.com/xyz): ", interactive=True, visible=False)
videoType.change(lambda x: (gr.update(visible=x == "Video file"), gr.update(visible=x == "Youtube link")), [videoType], [video_path, yt_link])
tts_engine = gr.Radio([AssetComponentsUtils.ELEVEN_TTS, AssetComponentsUtils.EDGE_TTS], label="Text to speech engine", value=AssetComponentsUtils.EDGE_TTS, interactive=True)
with gr.Column(visible=False) as eleven_tts:
language_eleven = gr.CheckboxGroup(self.eleven_language_choices, label="Language", value="ENGLISH", interactive=True)
voice_eleven = AssetComponentsUtils.voiceChoiceTranslation(provider=AssetComponentsUtils.ELEVEN_TTS)
with gr.Column(visible=True) as edge_tts:
language_edge = gr.CheckboxGroup([lang.value.upper() for lang in Language], label="Language", value="ENGLISH", interactive=True)
tts_engine.change(lambda x: (gr.update(visible=x == AssetComponentsUtils.ELEVEN_TTS), gr.update(visible=x == AssetComponentsUtils.EDGE_TTS)), [tts_engine], [eleven_tts, edge_tts])
useCaptions = gr.Checkbox(label="Caption video", value=False)
translateButton = gr.Button("Translate Video")
generation_error = gr.HTML(visible=False)
video_folder = gr.Button("📁", visible=True)
output = gr.HTML('')
video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/")))
translateButton.click(self.inspect_create_inputs, inputs=[videoType, video_path, yt_link, tts_engine, language_eleven, language_edge, ], outputs=[generation_error]).success(self.translate_video, inputs=[
videoType, yt_link, video_path, tts_engine, language_eleven, language_edge, useCaptions, voice_eleven
], outputs=[output, video_folder, generation_error])
self.video_translation_ui = video_translation_ui
return self.video_translation_ui
def translate_video(self, videoType, yt_link, video_path, tts_engine, language_eleven, language_edge, use_captions: bool, voice_eleven: str, progress=gr.Progress()) -> str:
if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
languages = [Language(lang.lower().capitalize()) for lang in language_eleven]
elif tts_engine == AssetComponentsUtils.EDGE_TTS:
languages = [Language(lang.lower().capitalize()) for lang in language_edge]
try:
for i, language in enumerate(languages):
if tts_engine == AssetComponentsUtils.EDGE_TTS:
voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[language]['male'])
if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVENLABS_API_KEY'), voice_eleven, checkElevenCredits=True)
content_translation_engine = MultiLanguageTranslationEngine(voiceModule=voice_module, src_url=yt_link if videoType == "Youtube link" else video_path, target_language=language, use_captions=use_captions)
num_steps = content_translation_engine.get_total_steps()
def logger(prog_str):
progress(self.progress_counter / (num_steps), f"Translating your video ({i+1}/{len(languages)}) - {prog_str}")
content_translation_engine.set_logger(logger)
for step_num, step_info in content_translation_engine.makeContent():
progress(self.progress_counter / (num_steps), f"Translating your video ({i+1}/{len(languages)}) - {step_info}")
self.progress_counter += 1
video_path = content_translation_engine.get_video_output_path()
current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url
file_url_path = f"{current_url}gradio_api/file={video_path}"
file_name = video_path.split("/")[-1].split("\\")[-1]
self.embedHTML += f'''
', gr.update(visible=True), gr.update(value=error_html, visible=True)
def inspect_create_inputs(self, videoType, video_path, yt_link, tts_engine, language_eleven, language_edge,):
supported_extensions = ['.mp4', '.avi', '.mov'] # Add more supported video extensions if needed
print(videoType, video_path, yt_link)
if videoType == "Youtube link":
if not yt_link.startswith("https://youtube.com/") and not yt_link.startswith("https://www.youtube.com/"):
raise gr.Error('Invalid YouTube URL. Please provide a valid URL. Link example: https://www.youtube.com/watch?v=dQw4w9WgXcQ')
else:
if not video_path or not os.path.exists(video_path):
raise gr.Error('You must drag and drop a valid video file.')
file_ext = os.path.splitext(video_path)[-1].lower()
if file_ext not in supported_extensions:
raise gr.Error('Invalid video file. Supported video file extensions are: {}'.format(', '.join(supported_extensions)))
if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
if not len(language_eleven) >0:
raise gr.Error('You must select one or more target languages')
if tts_engine == AssetComponentsUtils.EDGE_TTS:
if not len(language_edge) >0:
raise gr.Error('You must select one or more target languages')
return gr.update(visible=False)
def update_progress(progress, progress_counter, num_steps, num_shorts, stop_event):
start_time = time.time()
while not stop_event.is_set():
elapsed_time = time.time() - start_time
dynamic = int(3649 * elapsed_time / 600)
progress(progress_counter / (num_steps * num_shorts), f"Rendering progress - {dynamic}/3649")
time.sleep(0.1) # update every 0.1 second
================================================
FILE: installation-notes.md
================================================
** Thanks for Son Tran for the fixes on the installation guide. Here are the recommanded steps for installing ShortGPT:
### You now need Docker to now run ShortGPT. If you can't run it with docker, please use the Google Colab.
# To run ShortGPT docker:
First make a .env file with the API keys like this:
```bash
GEMINI_API_KEY=put_your_gemini_api_key_here
OPENAI_API_KEY=sk-_put_your_openai_api_key_here
ELEVENLABS_API_KEY=put_your_eleven_labs_api_key_here
PEXELS_API_KEY=put_your_pexels_api_key_here
```
To run Dockerfile do this:
```bash
docker build -t short_gpt_docker:latest .
docker run -p 31415:31415 --env-file .env short_gpt_docker:latest
```
Export Docker image:
```bash
docker save short_gpt_docker > short_gpt_docker.tar
```
### Here are the steps to install it from scratch on Linux, Debian 11 x64:
In short, you need to use:
- Python 3.10
- openai package, then upgrade openai-whisper
- ffmpeg 4.2.3
### 1. OS: Debian 11 x64
```bash
sudo apt update && sudo apt upgrade
sudo apt install wget git libltdl-dev libjpeg-dev libpng-dev libtiff-dev libgif-dev libfreetype6-dev liblcms2-dev libxml2-dev wget build-essential libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev libffi-dev zlib1g-dev
```
### 2. Install Python version: 3.10.3
```bash
wget https://www.python.org/ftp/python/3.10.3/Python-3.10.3.tgz
tar xzf Python-3.10.3.tgz
cd Python-3.10.3
./configure --enable-optimizations
make install
```
To check the Python version, use this command:
```bash
python3.10 -V
```
To use pip, use this command:
```bash
pip3.10 install
```
### 3. Install ffmpeg version: 4.2.3
ShortGPT will accept this version of FFmpeg:
3.1. Install Build Dependencies:
```bash
sudo apt update
sudo apt build-dep ffmpeg
```
3.2. Clone FFmpeg Source Code:
```bash
git clone https://git.ffmpeg.org/ffmpeg.git
cd ffmpeg
git checkout n4.2.3
```
3.3. Configure FFmpeg Build:
```bash
./configure --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-avisynth --enable-libopenmpt --enable-shared --disable-static
```
This step checks for the necessary dependencies and configures the build based on your system.
3.4. Build FFmpeg:
```bash
make -j$(nproc)
```
This step may take some time as it compiles the FFmpeg source code.
3.5. Install FFmpeg:
```bash
sudo make install
```
3.6. Verify Installation:
```bash
ffmpeg -version
```
This should display the version information, and you should see version 4.2.3.
Optional: Update Library Cache:
```bash
sudo ldconfig
```
This updates the dynamic linker run-time bindings.
That's it! You should now have FFmpeg version 4.2.3 installed on your Debian 11 system.
If you are still facing with "libavdevice.so.58" error when running ffmpeg, run this command to fix it, remember to change the path:
```bash
echo 'export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64/:/usr/local/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
```
### 4. Upgrade openai-whisper:
```bash
pip3.10 install -U openai-whisper
```
================================================
FILE: requirements.txt
================================================
python-dotenv
gradio_client==1.5.4
gradio==5.12.0
openai==1.37.0
httpx==0.27.2
tiktoken
tinydb
tinymongo
proglog
yt-dlp>=2025.1.12
torch
torchaudio
### whisper timestamped
whisper-timestamped
protobuf==3.20.3
pillow==10.4.0
moviepy==2.1.2
progress
questionary
edge-tts
================================================
FILE: runShortGPT.py
================================================
from gui.gui_gradio import ShortGptUI
app = ShortGptUI(colab=False)
app.launch()
================================================
FILE: runShortGPTColab.py
================================================
from gui.gui_gradio import ShortGptUI
app = ShortGptUI(colab=True)
app.launch()
================================================
FILE: setup.py
================================================
from setuptools import setup, find_packages
import codecs
import os
here = os.path.abspath(os.path.dirname(__file__))
with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh:
long_description = "\n" + fh.read()
VERSION = '0.1.31'
DESCRIPTION = 'Automating video and short content creation with AI'
LONG_DESCRIPTION = 'A powerful tool for automating content creation. It simplifies video creation, footage sourcing, voiceover synthesis, and editing tasks.'
setup(
name="shortgpt",
version=VERSION,
author="RayVentura",
author_email="",
description=DESCRIPTION,
long_description_content_type="text/markdown",
long_description=long_description,
packages=find_packages(),
package_data={'': ['*.yaml', '*.json']}, # This will include all yaml files in package
install_requires=[
'python-dotenv',
"openai==1.37.2",
'tiktoken',
'tinydb',
'tinymongo',
'proglog',
'yt-dlp',
'torch',
'whisper-timestamped',
'torchaudio',
'pillow==10.4.0',
'edge-tts',
'moviepy==2.1.2',
'progress',
'questionary',
],
keywords=['python', 'video', 'content creation', 'AI', 'automation', 'editing', 'voiceover synthesis', 'video captions', 'asset sourcing', 'tinyDB'],
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Operating System :: Unix",
"Operating System :: MacOS :: MacOS X",
"Operating System :: Microsoft :: Windows",
]
)
================================================
FILE: shortGPT/__init__.py
================================================
# import time
# t1 = time.time()
# from . import config
# print("Took", time.time() - t1, "seconds to import config")
# t1 = time.time()
# from . import editing
# print("Took", time.time() - t1, "seconds to import editing")
# t1 = time.time()
# from . import audio
# print("Took", time.time() - t1, "seconds to import audio")
# t1 = time.time()
# from . import engine
# print("Took", time.time() - t1, "seconds to import engine")
# t1 = time.time()
# from . import database
# print("Took", time.time() - t1, "seconds to import database")
# t1 = time.time()
# from . import gpt
# print("Took", time.time() - t1, "seconds to import gpt")
# t1 = time.time()
# from . import tracking
# print("Took", time.time() - t1, "seconds to import tracking")
# from . import config
# from . import database
# from . import editing_functions
# from . import audio
# from . import engine
# from . import gpt
# from . import tracking
================================================
FILE: shortGPT/api_utils/README.md
================================================
# Module: api_utils
The `api_utils` module provides utility functions for working with different APIs. It includes three files: `image_api.py`, `pexels_api.py`, and `eleven_api.py`. Each file contains functions related to a specific API.
## File: image_api.py
This file contains functions for interacting with the Bing Images API and extracting image URLs from the HTML response.
### Functions:
#### `_extractBingImages(html)`
This function takes an HTML response as input and extracts image URLs, widths, and heights from it. It uses regular expressions to find the necessary information. The extracted image URLs are returned as a list of dictionaries, where each dictionary contains the URL, width, and height of an image.
#### `_extractGoogleImages(html)`
This function takes an HTML response as input and extracts image URLs from it. It uses regular expressions to find the necessary information. The extracted image URLs are returned as a list.
#### `getBingImages(query, retries=5)`
This function takes a query string as input and retrieves a list of image URLs from the Bing Images API. It replaces spaces in the query string with `+` and sends a GET request to the API. If the request is successful (status code 200), the HTML response is passed to `_extractBingImages` to extract the image URLs. If the request fails or no images are found, an exception is raised.
## File: pexels_api.py
This file contains functions for interacting with the Pexels Videos API and retrieving video URLs based on a query string.
### Functions:
#### `search_videos(query_string, orientation_landscape=True)`
This function takes a query string and an optional boolean parameter `orientation_landscape` as input. It sends a GET request to the Pexels Videos API to search for videos based on the query string. The orientation of the videos can be specified as landscape or portrait. The function returns the JSON response from the API.
#### `getBestVideo(query_string, orientation_landscape=True, used_vids=[])`
This function takes a query string, an optional boolean parameter `orientation_landscape`, and an optional list `used_vids` as input. It calls the `search_videos` function to retrieve a list of videos based on the query string. It then filters and sorts the videos based on their dimensions and duration, and returns the URL of the best matching video. The `used_vids` parameter can be used to exclude previously used videos from the search results.
## File: eleven_api.py
This file contains functions for interacting with the Eleven API and generating voice recordings based on text input.
### Functions:
#### `getVoices(api_key="")`
This function takes an optional API key as input and retrieves a dictionary of available voices from the Eleven API. The voices are returned as a dictionary, where the keys are voice names and the values are voice IDs.
#### `getCharactersFromKey(key)`
This function takes an API key as input and retrieves the remaining character limit for the given key. It sends a GET request to the Eleven API and extracts the character limit and count from the response.
#### `generateVoice(text, character, fileName, stability=0.2, clarity=0.1, api_key="")`
This function takes a text input, a character name, a file name, and optional parameters `stability`, `clarity`, and `api_key` as input. It generates a voice recording using the Eleven API and saves it to the specified file. The character name is used to select the appropriate voice. The stability and clarity parameters control the quality of the voice recording. The API key is required for authentication. If the request is successful, the file name is returned. Otherwise, an empty string is returned.
================================================
FILE: shortGPT/api_utils/__init__.py
================================================
from . import image_api
from . import eleven_api
================================================
FILE: shortGPT/api_utils/eleven_api.py
================================================
import json
import requests
class ElevenLabsAPI:
def __init__(self, api_key):
self.api_key = api_key
self.url_base = 'https://api.elevenlabs.io/v1/'
self.get_voices()
def get_voices(self):
'''Get the list of voices available'''
url = self.url_base + 'voices'
headers = {'accept': 'application/json'}
if self.api_key:
headers['xi-api-key'] = self.api_key
response = requests.get(url, headers=headers)
self.voices = {voice['name']: voice['voice_id'] for voice in response.json()['voices']}
return self.voices
def get_remaining_characters(self):
'''Get the number of characters remaining'''
url = self.url_base + 'user'
headers = {'accept': '*/*', 'xi-api-key': self.api_key, 'Content-Type': 'application/json'}
response = requests.get(url, headers=headers)
if response.status_code == 200:
sub = response.json()['subscription']
return sub['character_limit'] - sub['character_count']
else:
raise Exception(response.json()['detail']['message'])
def generate_voice(self, text, character, filename, stability=0.2, clarity=0.1):
'''Generate a voice'''
if character not in self.voices:
print(character, 'is not in the array of characters: ', list(self.voices.keys()))
voice_id = self.voices[character]
url = f'{self.url_base}text-to-speech/{voice_id}/stream'
headers = {'accept': '*/*', 'xi-api-key': self.api_key, 'Content-Type': 'application/json'}
data = json.dumps({"model_id": "eleven_multilingual_v2", "text": text, "stability": stability, "similarity_boost": clarity})
response = requests.post(url, headers=headers, data=data)
if response.status_code == 200:
with open(filename, 'wb') as f:
f.write(response.content)
return filename
else:
message = response.text
raise Exception(f'Error in response, {response.status_code} , message: {message}')
================================================
FILE: shortGPT/api_utils/image_api.py
================================================
import json
import requests
import re
import urllib.parse
from urllib3 import Retry
def _extractBingImages(html):
pattern = r'mediaurl=(.*?)&.*?expw=(\d+).*?exph=(\d+)'
matches = re.findall(pattern, html)
result = []
for match in matches:
url, width, height = match
if url.endswith('.jpg') or url.endswith('.png') or url.endswith('.jpeg'):
result.append({'url': urllib.parse.unquote(url), 'width': int(width), 'height': int(height)})
return result
def _extractGoogleImages(html):
images = []
regex = re.compile(r"AF_initDataCallback\({key: 'ds:1', hash: '2', data:(.*?), sideChannel: {}}\);")
match = regex.search(html)
if match:
dz = json.loads(match.group(1))
for c in dz[56][1][0][0][1][0]:
try:
thing = list(c[0][0].values())[0]
images.append(thing[1][3])
except:
pass
return images
import urllib.parse
from requests.adapters import HTTPAdapter
def getBingImages(query, retries=5):
query = query.replace(" ", "+")
images = []
tries = 0
# Create a session with custom retry strategy
session = requests.Session()
retry_strategy = Retry(
total=retries,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504]
)
adapter = HTTPAdapter(max_retries=retry_strategy)
session.mount("https://", adapter)
while(len(images) == 0 and tries < retries):
try:
# Use verify=False to bypass SSL verification (use with caution)
response = session.get(
f"https://www.bing.com/images/search?q={query}&first=1",
verify=False,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
)
if(response.status_code == 200):
images = _extractBingImages(response.text)
else:
print("Error While making bing image searches", response.text)
raise Exception("Error While making bing image searches")
except requests.exceptions.SSLError as e:
print(f"SSL Error occurred (attempt {tries + 1}/{retries}): {str(e)}")
tries += 1
if tries >= retries:
raise Exception("Max retries reached - SSL Error while making Bing image searches")
continue
if(images):
return images
raise Exception("Error While making bing image searches")
================================================
FILE: shortGPT/api_utils/pexels_api.py
================================================
import requests
from shortGPT.config.api_db import ApiKeyManager
def search_videos(query_string, orientation_landscape=True):
url = "https://api.pexels.com/videos/search"
headers = {
"Authorization": ApiKeyManager.get_api_key("PEXELS_API_KEY")
}
params = {
"query": query_string,
"orientation": "landscape" if orientation_landscape else "portrait",
"per_page": 15
}
response = requests.get(url, headers=headers, params=params)
json_data = response.json()
# print(response.headers['X-Ratelimit-Limit'])
# print(response.headers['X-Ratelimit-Remaining'])
# print(response.headers['X-Ratelimit-Reset'])
return json_data
def getBestVideo(query_string, orientation_landscape=True, used_vids=[]):
vids = search_videos(query_string, orientation_landscape)
videos = vids['videos'] # Extract the videos list from JSON
# Filter and extract videos with width and height as 1920x1080 for landscape or 1080x1920 for portrait
if orientation_landscape:
filtered_videos = [video for video in videos if video['width'] >= 1920 and video['height'] >= 1080 and video['width']/video['height'] == 16/9]
else:
filtered_videos = [video for video in videos if video['width'] >= 1080 and video['height'] >= 1920 and video['height']/video['width'] == 16/9]
# Sort the filtered videos by duration in ascending order
sorted_videos = sorted(filtered_videos, key=lambda x: abs(15-int(x['duration'])))
# Extract the top 3 videos' URLs
for video in sorted_videos:
for video_file in video['video_files']:
if orientation_landscape:
if video_file['width'] == 1920 and video_file['height'] == 1080:
if not (video_file['link'].split('.hd')[0] in used_vids):
return video_file['link']
else:
if video_file['width'] == 1080 and video_file['height'] == 1920:
if not (video_file['link'].split('.hd')[0] in used_vids):
return video_file['link']
print("NO LINKS found for this round of search with query :", query_string)
return None
================================================
FILE: shortGPT/audio/README.md
================================================
# Audio Module
The audio module provides a set of functions and classes for working with audio files and performing various operations on them.
## audio_utils.py
This file contains utility functions for audio processing.
### downloadYoutubeAudio(url, outputFile)
Downloads audio from a YouTube video given its URL and saves it to the specified output file. Returns the path to the downloaded audio file and its duration.
### speedUpAudio(tempAudioPath, outputFile, expected_chars_per_sec=CONST_CHARS_PER_SEC)
Speeds up the audio to make it under 60 seconds. If the duration of the audio is greater than 57 seconds, it will be sped up to fit within the time limit. Otherwise, the audio will be left unchanged. Returns the path to the sped up audio file.
### ChunkForAudio(alltext, chunk_size=2500)
Splits a text into chunks of a specified size (default is 2500 characters) to be used for audio generation. Returns a list of text chunks.
### audioToText(filename, model_size="base")
Converts an audio file to text using a pre-trained model. Returns a generator object that yields the transcribed text and its corresponding timestamps.
### getWordsPerSec(filename)
Calculates the average number of words per second in an audio file. Returns the words per second value.
### getCharactersPerSec(filename)
Calculates the average number of characters per second in an audio file. Returns the characters per second value.
## audio_duration.py
This file contains functions for getting the duration of audio files.
### get_duration_yt_dlp(url)
Gets the duration of a YouTube video or audio using the yt_dlp library. Returns the duration in seconds.
### get_duration_ffprobe(signed_url)
Gets the duration of an audio or video file using the ffprobe command line tool. Returns the duration in seconds.
### getAssetDuration(url, isVideo=True)
Gets the duration of an audio or video asset from various sources, including YouTube and cloud storage providers. Returns the URL of the asset and its duration in seconds.
### getYoutubeAudioLink(url)
Gets the audio link of a YouTube video given its URL. Returns the audio URL and its duration in seconds.
### getYoutubeVideoLink(url)
Gets the video link of a YouTube video given its URL. Returns the video URL and its duration in seconds.
## voice_module.py
This file contains an abstract base class for voice modules.
### VoiceModule
An abstract base class that defines the interface for voice modules. Voice modules are responsible for generating voice recordings from text.
#### update_usage()
Updates the usage statistics of the voice module.
#### get_remaining_characters()
Gets the number of remaining characters that can be generated using the voice module.
#### generate_voice(text, outputfile)
Generates a voice recording from the specified text and saves it to the specified output file.
## eleven_voice_module.py
This file contains a voice module implementation for the ElevenLabs API.
### ElevenLabsVoiceModule
A voice module implementation for the ElevenLabs API. Requires an API key and a voice name to be initialized.
#### update_usage()
Updates the usage statistics of the ElevenLabs API.
#### get_remaining_characters()
Gets the number of remaining characters that can be generated using the ElevenLabs API.
#### generate_voice(text, outputfile)
Generates a voice recording from the specified text using the ElevenLabs API and saves it to the specified output file. Raises an exception if the API key does not have enough credits to generate the text.
================================================
FILE: shortGPT/audio/__init__.py
================================================
from . import audio_utils
from . import eleven_voice_module
from . import audio_duration
================================================
FILE: shortGPT/audio/audio_duration.py
================================================
import json
import subprocess
import yt_dlp
from shortGPT.editing_utils.handle_videos import getYoutubeVideoLink
def get_duration_yt_dlp(url):
ydl_opts = {
"quiet": True,
"no_warnings": True,
"no_color": True,
"no_call_home": True,
"no_check_certificate": True
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
dictMeta = ydl.extract_info(url, download=False, )
return dictMeta['duration']
except Exception as e:
raise Exception(f"Failed getting duration from the following video/audio url/path using yt_dlp. {url} {e.args[0]}")
def get_duration_ffprobe(signed_url):
try:
cmd = [
"ffprobe",
"-v",
"quiet",
"-print_format",
"json",
"-show_format",
"-i",
signed_url
]
output = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if output.returncode != 0:
return None, f"Error executing command using ffprobe. {output.stderr.strip()}"
metadata = json.loads(output.stdout)
duration = float(metadata["format"]["duration"])
return duration, ""
except Exception as e:
print("Failed getting the duration of the asked ressource", e.args[0])
return None, ""
def get_asset_duration(url, isVideo=True):
if ("youtube.com" in url):
if not isVideo:
url, _ = getYoutubeAudioLink(url)
else:
url, _ = getYoutubeVideoLink(url)
# Trying two different method to get the duration of the video / audio
duration, err_ffprobe = get_duration_ffprobe(url)
if duration is not None:
return url, duration
duration = get_duration_yt_dlp(url)
if duration is not None:
return url, duration
print(err_ffprobe)
raise Exception(f"The url/path {url} does not point to a video/ audio. Impossible to extract its duration")
def getYoutubeAudioLink(url):
ydl_opts = {
"quiet": True,
"no_warnings": True,
"no_color": True,
"no_call_home": True,
"no_check_certificate": True,
"format": "bestaudio/best"
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
dictMeta = ydl.extract_info(
url,
download=False)
return dictMeta['url'], dictMeta['duration']
except Exception as e:
print("Failed getting audio link from the following video/url", e.args[0])
return None
================================================
FILE: shortGPT/audio/audio_utils.py
================================================
import os
import subprocess
import time
import yt_dlp
from shortGPT.audio.audio_duration import get_asset_duration
CONST_CHARS_PER_SEC = 20.5 # Arrived to this result after whispering a ton of shorts and calculating the average number of characters per second of speech.
WHISPER_MODEL = None
def downloadYoutubeAudio(url, outputFile):
ydl_opts = {
"quiet": True,
"no_warnings": True,
"no_color": True,
"no_call_home": True,
"no_check_certificate": True,
"format": "bestaudio/best",
"outtmpl": outputFile
}
attempts = 0
max_attempts = 4
while attempts < max_attempts:
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
dictMeta = ydl.extract_info(
url,
download=True)
if (not os.path.exists(outputFile)):
raise Exception("Audio Download Failed")
return outputFile, dictMeta['duration']
except Exception as e:
attempts += 1
if attempts == max_attempts:
raise Exception(f"Failed downloading audio from the following video/url for url {url}", e.args[0])
time.sleep(1)
continue
return None
def speedUpAudio(tempAudioPath, outputFile, expected_duration=None):
tempAudioPath, duration = get_asset_duration(tempAudioPath, False)
if not expected_duration:
if (duration > 57):
subprocess.run(['ffmpeg', '-loglevel', 'error', '-i', tempAudioPath, '-af', f'atempo={(duration/57):.5f}', outputFile])
else:
subprocess.run(['ffmpeg', '-loglevel', 'error', '-i', tempAudioPath, outputFile])
else:
subprocess.run(['ffmpeg', '-loglevel', 'error', '-i', tempAudioPath, '-af', f'atempo={(duration/expected_duration):.5f}', outputFile])
if (os.path.exists(outputFile)):
return outputFile
def ChunkForAudio(alltext, chunk_size=2500):
alltext_list = alltext.split('.')
chunks = []
curr_chunk = ''
for text in alltext_list:
if len(curr_chunk) + len(text) <= chunk_size:
curr_chunk += text + '.'
else:
chunks.append(curr_chunk)
curr_chunk = text + '.'
if curr_chunk:
chunks.append(curr_chunk)
return chunks
def audioToText(filename, model_size="base"):
from whisper_timestamped import load_model, transcribe_timestamped
global WHISPER_MODEL
if (WHISPER_MODEL == None):
WHISPER_MODEL = load_model(model_size)
gen = transcribe_timestamped(WHISPER_MODEL, filename, verbose=False, fp16=False)
return gen
def getWordsPerSec(filename):
a = audioToText(filename)
return len(a['text'].split()) / a['segments'][-1]['end']
def getCharactersPerSec(filename):
a = audioToText(filename)
return len(a['text']) / a['segments'][-1]['end']
def run_background_audio_split(sound_file_path):
try:
# Run spleeter command
# Get absolute path of sound file
output_dir = os.path.dirname(sound_file_path)
command = f"spleeter separate -p spleeter:2stems -o '{output_dir}' '{sound_file_path}'"
process = subprocess.run(command, shell=True, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# If spleeter runs successfully, return the path to the background music file
if process.returncode == 0:
return os.path.join(output_dir, sound_file_path.split("/")[-1].split(".")[0], "accompaniment.wav")
else:
return None
except Exception:
# If spleeter crashes, return None
return None
================================================
FILE: shortGPT/audio/edge_voice_module.py
================================================
import asyncio
import os
from concurrent.futures import ThreadPoolExecutor
import edge_tts
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
LANGUAGE_ACRONYM_MAPPING, Language)
def run_async_func(loop, func):
return loop.run_until_complete(func)
class EdgeTTSVoiceModule(VoiceModule):
def __init__(self, voiceName):
self.voiceName = voiceName
super().__init__()
def update_usage(self):
return None
def get_remaining_characters(self):
return 999999999999
def generate_voice(self, text, outputfile):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
with ThreadPoolExecutor() as executor:
loop.run_in_executor(executor, run_async_func, loop, self.async_generate_voice(text, outputfile))
finally:
loop.close()
if not os.path.exists(outputfile):
print("An error happened during edge_tts audio generation, no output audio generated")
raise Exception("An error happened during edge_tts audio generation, no output audio generated")
return outputfile
async def async_generate_voice(self, text, outputfile):
try:
communicate = edge_tts.Communicate(text, self.voiceName)
with open(outputfile, "wb") as file:
async for chunk in communicate.stream():
if chunk["type"] == "audio":
file.write(chunk["data"])
except Exception as e:
print("Error generating audio using edge_tts", e)
raise Exception("An error happened during edge_tts audio generation, no output audio generated", e)
return outputfile
================================================
FILE: shortGPT/audio/eleven_voice_module.py
================================================
from shortGPT.api_utils.eleven_api import ElevenLabsAPI
from shortGPT.audio.voice_module import VoiceModule
class ElevenLabsVoiceModule(VoiceModule):
def __init__(self, api_key, voiceName, checkElevenCredits=False):
self.api_key = api_key
self.voiceName = voiceName
self.remaining_credits = None
self.eleven_labs_api = ElevenLabsAPI(self.api_key)
self.update_usage()
if checkElevenCredits and self.get_remaining_characters() < 1200:
raise Exception(f"Your ElevenLabs API KEY doesn't have enough credits ({self.remaining_credits} character remaining). Minimum required: 1200 characters (equivalent to a 45sec short)")
super().__init__()
def update_usage(self):
self.remaining_credits = self.eleven_labs_api.get_remaining_characters()
return self.remaining_credits
def get_remaining_characters(self):
return self.remaining_credits if self.remaining_credits else self.eleven_labs_api.get_remaining_characters()
def generate_voice(self, text, outputfile):
if self.get_remaining_characters() >= len(text):
file_path =self.eleven_labs_api.generate_voice(text=text, character=self.voiceName, filename=outputfile)
self.update_usage()
return file_path
else:
raise Exception(f"You cannot generate {len(text)} characters as your ElevenLabs key has only {self.remaining_credits} characters remaining")
================================================
FILE: shortGPT/audio/voice_module.py
================================================
from abc import ABC, abstractmethod
class VoiceModule(ABC):
def __init__(self):
pass
@abstractmethod
def update_usage(self):
pass
@abstractmethod
def get_remaining_characters(self):
pass
@abstractmethod
def generate_voice(self,text, outputfile):
pass
================================================
FILE: shortGPT/config/README.md
================================================
# Module: config
The `config` module contains various files and functions related to configuration settings and utilities.
## File: config.py
This file contains functions for reading and writing YAML files, as well as loading local assets specified in a YAML configuration file.
### Functions:
#### `read_yaml_config(file_path: str) -> dict`
This function reads and returns the contents of a YAML file as a dictionary.
Parameters:
- `file_path` - The path to the YAML file to be read.
Returns:
- A dictionary containing the contents of the YAML file.
#### `write_yaml_config(file_path: str, data: dict)`
This function writes a dictionary to a YAML file.
Parameters:
- `file_path` - The path to the YAML file to be written.
- `data` - The dictionary to be written to the YAML file.
#### `load_editing_assets() -> dict`
This function loads all local assets from the static-assets folder specified in the yaml_config.
Returns:
- A dictionary containing the YAML configuration with updated local assets.
## File: asset_db.py
This file contains a class `AssetDatabase` that provides methods for managing a database of assets.
### Class: AssetDatabase
This class represents a database of assets and provides methods for adding, removing, and retrieving assets.
Methods:
#### `__init__()`
This method initializes the `AssetDatabase` object. It creates the local and remote asset collections if they don't already exist.
#### `asset_exists(name)`
This method checks if an asset with the given name exists in the database.
Parameters:
- `name` - The name of the asset.
Returns:
- `True` if the asset exists, `False` otherwise.
#### `add_local_asset(name, type, path)`
This method adds a local asset to the database.
Parameters:
- `name` - The name of the asset.
- `type` - The type of the asset.
- `path` - The path to the asset file.
#### `add_remote_asset(name, type, url)`
This method adds a remote asset to the database.
Parameters:
- `name` - The name of the asset.
- `type` - The type of the asset.
- `url` - The URL of the remote asset.
#### `remove_asset(name)`
This method removes an asset from the database.
Parameters:
- `name` - The name of the asset.
#### `get_df()`
This method returns a pandas DataFrame with specific asset details.
Returns:
- A pandas DataFrame containing the asset details.
#### `sync_local_assets()`
This method loads all local assets from the static-assets folder into the database.
#### `getAssetLink(key)`
This method returns the link or path of an asset with the given key.
Parameters:
- `key` - The key of the asset.
Returns:
- The link or path of the asset.
#### `getAssetDuration(key)`
This method returns the duration of an asset with the given key.
Parameters:
- `key` - The key of the asset.
Returns:
- The duration of the asset.
#### `updateLocalAsset(key: str)`
This method updates the local asset with the given key.
Parameters:
- `key` - The key of the asset.
Returns:
- The file path and duration of the updated asset.
#### `updateYoutubeAsset(key: str)`
This method updates the YouTube asset with the given key.
Parameters:
- `key` - The key of the asset.
Returns:
- The remote URL and duration of the updated asset.
## File: api_db.py
This file contains functions for managing API keys.
### Functions:
#### `get_api_key(name)`
This function retrieves the API key with the given name.
Parameters:
- `name` - The name of the API key.
Returns:
- The API key.
#### `set_api_key(name, value)`
This function sets the API key with the given name to the specified value.
Parameters:
- `name` - The name of the API key.
- `value` - The value of the API key.
## File: languages.py
This file contains an enumeration class `Language` that represents different languages.
### Enum: Language
This enumeration class represents different languages and provides a list of supported languages.
Supported Languages:
- ENGLISH
- SPANISH
- FRENCH
- ARABIC
- GERMAN
- POLISH
- ITALIAN
- PORTUGUESE
## File: path_utils.py
This file contains utility functions for searching for program paths.
### Functions:
#### `search_program(program_name)`
This function searches for the specified program and returns its path.
Parameters:
- `program_name` - The name of the program to search for.
Returns:
- The path of the program, or None if the program is not found.
#### `get_program_path(program_name)`
This function retrieves the path of the specified program.
Parameters:
- `program_name` - The name of the program.
Returns:
- The path of the program, or None if the program is not found.
================================================
FILE: shortGPT/config/__init__.py
================================================
from . import config
================================================
FILE: shortGPT/config/api_db.py
================================================
import enum
import os
from shortGPT.database.db_document import TinyMongoDocument
from dotenv import load_dotenv
load_dotenv('./.env')
class ApiProvider(enum.Enum):
OPENAI = "OPENAI_API_KEY"
GEMINI = "GEMINI_API_KEY"
ELEVEN_LABS = "ELEVENLABS_API_KEY"
PEXELS = "PEXELS_API_KEY"
class ApiKeyManager:
api_key_doc_manager = TinyMongoDocument("api_db", "api_keys", "key_doc", create=True)
@classmethod
def get_api_key(cls, key: str | ApiProvider):
if isinstance(key, ApiProvider):
key = key.value
# Check if the key is present in the database
api_key = cls.api_key_doc_manager._get(key)
if api_key:
return api_key
# If not found in the database, check in the environment variables
env_key = key.replace(" ", "_").upper()
api_key = os.environ.get(env_key)
if api_key:
return api_key
return ""
@classmethod
def set_api_key(cls, key: str | ApiProvider, value: str):
if isinstance(key, ApiProvider):
key = key.value
return cls.api_key_doc_manager._save({key: value})
================================================
FILE: shortGPT/config/asset_db.py
================================================
import base64
import re
import shutil
import time
from datetime import datetime
from pathlib import Path
import enum
import pandas as pd
from shortGPT.audio.audio_utils import downloadYoutubeAudio, get_asset_duration
from shortGPT.database.db_document import TinyMongoDocument
AUDIO_EXTENSIONS = {".mp3", ".m4a", ".wav", ".flac", ".aac", ".ogg", ".wma", ".opus"}
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".svg", ".webp"}
VIDEO_EXTENSIONS = {".mp4", ".mkv", ".flv", ".avi", ".mov", ".wmv", ".webm", ".m4v"}
TEMPLATE_ASSETS_DB_PATH = '.database/template_asset_db.json'
ASSETS_DB_PATH = '.database/asset_db.json'
class AssetType(enum.Enum):
VIDEO = "video"
AUDIO = "audio"
IMAGE = "image"
BACKGROUND_MUSIC = "background music"
BACKGROUND_VIDEO = "background video"
OTHER = "other"
class AssetDatabase:
"""
Class for managing assets, both local and remote.
The class provides methods to add, remove, get and sync assets.
It uses a MongoDB-like database to store information about the assets.
"""
if not Path(ASSETS_DB_PATH).exists() and Path(TEMPLATE_ASSETS_DB_PATH).exists():
shutil.copy(TEMPLATE_ASSETS_DB_PATH, ASSETS_DB_PATH)
local_assets = TinyMongoDocument("asset_db", "asset_collection", "local_assets", create=True)
remote_assets = TinyMongoDocument("asset_db", "asset_collection", "remote_assets", create=True)
if not remote_assets._get('subscribe animation'):
remote_assets._save({
'subscribe animation':{
"type": AssetType.VIDEO.value,
"url": "https://www.youtube.com/watch?v=72WhUT0OM98",
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def asset_exists(cls, name: str) -> bool:
return name in cls.local_assets._get() or name in cls.remote_assets._get()
@classmethod
def add_local_asset(cls, name: str, asset_type: AssetType, path: str):
cls.local_assets._save({
name: {
"type": asset_type.value,
"path": path,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def add_remote_asset(cls, name: str, asset_type: AssetType, url: str):
cls.remote_assets._save({
name: {
"type": asset_type.value,
"url": url,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def remove_asset(cls, name: str):
if name in cls.local_assets._get():
cls._remove_local_asset(name)
elif name in cls.remote_assets._get():
cls.remote_assets._delete(name)
else:
raise ValueError(f"Asset '{name}' does not exist in the database.")
@classmethod
def get_df(cls, source=None) -> pd.DataFrame:
cls.sync_local_assets()
data = []
if source is None or source == 'local':
for key, asset in cls.local_assets._get().items():
data.append({'name': key,
'type': asset['type'],
'link': asset['path'],
'source': 'local',
'ts': asset.get('ts')
})
if source is None or source == 'youtube':
for key, asset in cls.remote_assets._get().items():
data.append({'name': key,
'type': asset['type'],
'link': asset['url'],
'source': 'youtube' if 'youtube' in asset['url'] else 'internet',
'ts': asset.get('ts')
})
df = pd.DataFrame(data)
if (not df.empty):
df.sort_values(by='ts', ascending=False, inplace=True)
return df.drop(columns='ts')
return df
@classmethod
def sync_local_assets(cls):
"""
Loads all local assets from the static-assets folder into the database.
"""
local_assets = cls.local_assets._get()
local_paths = {asset['path'] for asset in local_assets.values()}
for path in Path('public').rglob('*'):
if path.is_file() and str(path) not in local_paths:
cls._add_local_asset_from_path(path)
@classmethod
def get_asset_link(cls, key: str) -> str:
"""
Get the link to an asset.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
if key in cls.local_assets._get():
return cls._update_local_asset_timestamp_and_get_link(key)
elif key in cls.remote_assets._get():
return cls._get_remote_asset_link(key)
else:
raise ValueError(f"Asset '{key}' does not exist in the database.")
@classmethod
def get_asset_duration(cls, key: str) -> str:
"""
Get the duration of an asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
if key in cls.local_assets._get():
return cls._get_local_asset_duration(key)
elif key in cls.remote_assets._get():
return cls._get_remote_asset_duration(key)
else:
raise ValueError(f"Asset '{key}' does not exist in the database.")
@classmethod
def _remove_local_asset(cls, name: str):
"""
Remove a local asset from the database.
Args:
name (str): Name of the asset.
"""
asset = cls.local_assets._get(name)
if 'required' not in asset:
try:
Path(asset['path']).unlink()
except FileNotFoundError as e:
print(f"File not found: {e}")
cls.local_assets._delete(name)
@classmethod
def _add_local_asset_from_path(cls, path: Path):
"""
Add a local asset to the database from a file path.
Args:
path (Path): Path to the asset.
"""
file_ext = path.suffix
if file_ext in AUDIO_EXTENSIONS:
asset_type = AssetType.AUDIO
elif file_ext in IMAGE_EXTENSIONS:
asset_type = AssetType.IMAGE
elif file_ext in VIDEO_EXTENSIONS:
asset_type = AssetType.VIDEO
else:
asset_type = AssetType.OTHER
cls.local_assets._save({
path.stem: {
"path": str(path),
"type": asset_type.value,
"ts": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
@classmethod
def _update_local_asset_timestamp_and_get_link(cls, key: str) -> str:
"""
Update the timestamp of a local asset and get its link.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
asset = cls.local_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.local_assets._save({key: asset})
return asset['path']
@classmethod
def _get_remote_asset_link(cls, key: str) -> str:
"""
Get the link to a remote asset.
Args:
key (str): Name of the asset.
Returns:
str: Link to the asset.
"""
asset = cls.remote_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.remote_assets._save({key: asset})
if 'youtube' in asset['url']:
return cls._get_youtube_asset_link(key, asset)
return asset['url']
@classmethod
def _get_local_asset_duration(cls, key: str) -> str:
"""
Get the duration of a local asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.local_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.local_assets._save({key: asset})
if 'duration' not in asset and asset['duration'] is not None:
_, duration = cls._update_local_asset_duration(key)
return duration
return asset['duration']
@classmethod
def _get_remote_asset_duration(cls, key: str) -> str:
"""
Get the duration of a remote asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.remote_assets._get(key)
asset['ts'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
cls.remote_assets._save({key: asset})
if 'duration' in asset and asset['duration'] is not None:
return asset['duration']
_, duration = cls._update_youtube_asset_duration(key)
return duration
@classmethod
def _update_local_asset_duration(cls, key: str) -> str:
"""
Update the duration of a local asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.local_assets._get(key)
path = Path(asset['path'])
if any(t in asset['type'] for t in ['audio', 'video', 'music']):
_, duration = get_asset_duration(str(path))
asset['duration'] = duration
else:
duration = None
cls.local_assets._save({key: asset})
return str(path), duration
@classmethod
def _update_youtube_asset_duration(cls, key: str) -> str:
"""
Update the duration of a Youtube asset.
Args:
key (str): Name of the asset.
Returns:
str: Duration of the asset.
"""
asset = cls.remote_assets._get(key)
youtube_url = asset['url']
remote_url, duration = get_asset_duration(youtube_url, isVideo="video" in asset['type'])
asset.update({
"remote_url": base64.b64encode(remote_url.encode()).decode('utf-8'),
"duration": duration,
})
cls.remote_assets._save({key: asset})
return remote_url, duration
@classmethod
def _get_youtube_asset_link(cls, key: str, asset: dict) -> str:
"""
Get the link to a Youtube asset.
Args:
key (str): Name of the asset.
asset (dict): Asset data.
Returns:
str: Link to the asset.
"""
if any(t in asset['type'] for t in ['audio', 'music']):
local_audio_file, duration = downloadYoutubeAudio(asset['url'], f"public/{key}.wav")
cls.local_assets._save({
key: {
'path': local_audio_file,
'duration': duration,
'type': 'audio',
'ts': datetime.now().strftime("%Y-%m-%d %H:%M:%S")
}
})
return local_audio_file
if 'remote_url' in asset:
asset['remote_url'] = base64.b64decode(asset['remote_url']).decode('utf-8')
expire_timestamp_match = re.search(r"expire=(\d+)", asset['remote_url'])
not_expired = expire_timestamp_match and int(expire_timestamp_match.group(1)) > time.time() + 1800
if not_expired and asset.get('duration') is not None :
return asset['remote_url']
remote_url, _ = cls._update_youtube_asset_duration(key)
return remote_url
================================================
FILE: shortGPT/config/config.py
================================================
import yaml
import os
from dotenv import load_dotenv
load_dotenv()
ELEVEN_LABS_KEY = os.getenv('ELEVEN_LABS_API_KEY')
OPENAI_KEY = os.getenv('OPENAI_API_KEY')
PLAY_HT_USERID = os.getenv('PLAY_HT_USERID')
PLAY_HT_API_KEY = os.getenv('PLAY_HT_API_KEY')
def read_yaml_config(file_path: str) -> dict:
"""Reads and returns the contents of a YAML file as dictionary"""
with open(file_path, 'r') as file:
contents = yaml.safe_load(file)
return contents
def write_yaml_config(file_path: str, data: dict):
"""Writes a dictionary to a YAML file"""
with open(file_path, 'w') as file:
yaml.dump(data, file)
def load_editing_assets() -> dict:
"""Loads all local assets from the static-assets folder specified in the yaml_config"""
yaml_config = read_yaml_config("public.yaml")
if yaml_config['local-assets'] == None:
yaml_config['local-assets'] = {}
# Create a copy of the dictionary before iterating over it
local_paths = []
if yaml_config['local-assets'] != {}:
local_assets = yaml_config['local-assets'].copy()
# Removing local paths that don't exist
for key in local_assets:
asset = local_assets[key]
if(type(asset) == str):
filePath = local_assets[key]
else:
filePath = local_assets[key]['path']
if not os.path.exists(filePath):
del yaml_config['local-assets'][key]
else:
local_paths.append(filePath)
folder_path = 'public'
for foldername, subfolders, filenames in os.walk(folder_path):
for filename in filenames:
file_path = os.path.join(foldername, filename).replace("\\", "/")
if not file_path in local_paths:
yaml_config['local-assets'][filename] = file_path
write_yaml_config("public.yaml", yaml_config)
return yaml_config
# print(load_editing_assets())
# print(read_yaml_config("editing_assets.yaml")['local-assets'])
================================================
FILE: shortGPT/config/languages.py
================================================
from enum import Enum
class Language(Enum):
ENGLISH = "English"
SPANISH = "Spanish"
FRENCH = "French"
ARABIC = "Arabic"
GERMAN = "German"
POLISH = "Polish"
ITALIAN = "Italian"
PORTUGUESE = "Portuguese"
AFRIKAANS = "Afrikaans"
AMHARIC = "Amharic"
AZERBAIJANI = "Azerbaijani"
BULGARIAN = "Bulgarian"
BENGALI = "Bengali"
BOSNIAN = "Bosnian"
CATALAN = "Catalan"
CZECH = "Czech"
WELSH = "Welsh"
DANISH = "Danish"
GREEK = "Greek"
ESTONIAN = "Estonian"
PERSIAN = "Persian"
FINNISH = "Finnish"
FILIPINO = "Filipino"
GALICIAN = "Galician"
GUJARATI = "Gujarati"
HEBREW = "Hebrew"
HINDI = "Hindi"
CROATIAN = "Croatian"
HUNGARIAN = "Hungarian"
INDONESIAN = "Indonesian"
ICELANDIC = "Icelandic"
JAPANESE = "Japanese"
JAVANESE = "Javanese"
GEORGIAN = "Georgian"
KAZAKH = "Kazakh"
KHMER = "Khmer"
KANNADA = "Kannada"
KOREAN = "Korean"
LAO = "Lao"
LITHUANIAN = "Lithuanian"
LATVIAN = "Latvian"
MACEDONIAN = "Macedonian"
MALAYALAM = "Malayalam"
MONGOLIAN = "Mongolian"
MARATHI = "Marathi"
MALAY = "Malay"
MALTESE = "Maltese"
MYANMAR = "Myanmar"
NORWEGIAN = "Norwegian"
NEPALI = "Nepali"
DUTCH = "Dutch"
NORWEGIAN_BOKMAL = "Norwegian Bokmål"
NORWEGIAN_NYNORSK = "Norwegian Nynorsk"
PASHTO = "Pashto"
ROMANIAN = "Romanian"
RUSSIAN = "Russian"
SINHALA = "Sinhala"
SLOVAK = "Slovak"
SLOVENIAN = "Slovenian"
SOMALI = "Somali"
ALBANIAN = "Albanian"
SERBIAN = "Serbian"
SUNDANESE = "Sundanese"
SWEDISH = "Swedish"
SWAHILI = "Swahili"
TAMIL = "Tamil"
TELUGU = "Telugu"
THAI = "Thai"
TURKISH = "Turkish"
UKRAINIAN = "Ukrainian"
URDU = "Urdu"
UZBEK = "Uzbek"
VIETNAMESE = "Vietnamese"
CHINESE = "Chinese"
ZULU = "Zulu"
ELEVEN_SUPPORTED_LANGUAGES=[Language.ENGLISH,
Language.SPANISH,
Language.FRENCH,
Language.ARABIC,
Language.GERMAN,
Language.POLISH,
Language.ITALIAN,
Language.PORTUGUESE]
LANGUAGE_ACRONYM_MAPPING={
Language.ENGLISH : "en",
Language.SPANISH : "es",
Language.FRENCH : "fr",
Language.ARABIC : "ar",
Language.GERMAN : "de",
Language.POLISH : "pl",
Language.ITALIAN : "it",
Language.PORTUGUESE : "pt",
Language.AFRIKAANS : "af",
Language.AMHARIC : "am",
Language.AZERBAIJANI : "az",
Language.BULGARIAN : "bg",
Language.BENGALI : "bn",
Language.BOSNIAN : "bs",
Language.CATALAN : "ca",
Language.CZECH : "cs",
Language.WELSH : "cy",
Language.DANISH : "da",
Language.GREEK : "el",
Language.ESTONIAN : "et",
Language.PERSIAN : "fa",
Language.FINNISH : "fi",
Language.FILIPINO : "fil",
Language.GALICIAN : "gl",
Language.GUJARATI : "gu",
Language.HEBREW : "he",
Language.HINDI : "hi",
Language.CROATIAN : "hr",
Language.HUNGARIAN : "hu",
Language.INDONESIAN : "id",
Language.ICELANDIC : "is",
Language.JAPANESE : "ja",
Language.JAVANESE : "jv",
Language.GEORGIAN : "ka",
Language.KAZAKH : "kk",
Language.KHMER : "km",
Language.KANNADA : "kn",
Language.KOREAN : "ko",
Language.LAO : "lo",
Language.LITHUANIAN : "lt",
Language.LATVIAN : "lv",
Language.MACEDONIAN : "mk",
Language.MALAYALAM : "ml",
Language.MONGOLIAN : "mn",
Language.MARATHI : "mr",
Language.MALAY : "ms",
Language.MALTESE : "mt",
Language.MYANMAR : "my",
Language.NORWEGIAN : "no",
Language.NEPALI : "ne",
Language.DUTCH : "nl",
Language.NORWEGIAN_BOKMAL : "nb",
Language.NORWEGIAN_NYNORSK : "nn",
Language.PASHTO : "ps",
Language.ROMANIAN : "ro",
Language.RUSSIAN : "ru",
Language.SINHALA : "si",
Language.SLOVAK : "sk",
Language.SLOVENIAN : "sl",
Language.SOMALI : "so",
Language.ALBANIAN : "sq",
Language.SERBIAN : "sr",
Language.SUNDANESE : "su",
Language.SWEDISH : "sv",
Language.SWAHILI : "sw",
Language.TAMIL : "ta",
Language.TELUGU : "te",
Language.THAI : "th",
Language.TURKISH : "tr",
Language.UKRAINIAN : "uk",
Language.URDU : "ur",
Language.UZBEK : "uz",
Language.VIETNAMESE : "vi",
Language.CHINESE : "zh",
Language.ZULU : "zu",
}
ACRONYM_LANGUAGE_MAPPING = {v: k for k, v in LANGUAGE_ACRONYM_MAPPING.items()}
EDGE_TTS_VOICENAME_MAPPING = {
Language.ENGLISH: {'male': 'en-AU-WilliamNeural', 'female': 'en-AU-NatashaNeural'},
Language.SPANISH: {'male': 'es-AR-TomasNeural', 'female': 'es-AR-ElenaNeural'},
Language.FRENCH: {'male': 'fr-CA-AntoineNeural', 'female': 'fr-CA-SylvieNeural'},
Language.ARABIC: {'male': 'ar-AE-HamdanNeural', 'female': 'ar-AE-FatimaNeural'},
Language.GERMAN: {'male': 'de-DE-ConradNeural', 'female': 'de-DE-KatjaNeural'},
Language.POLISH: {'male': 'pl-PL-MarekNeural', 'female': 'pl-PL-ZofiaNeural'},
Language.ITALIAN: {'male': 'it-IT-DiegoNeural', 'female': 'it-IT-ElsaNeural'},
Language.PORTUGUESE: {'male': 'pt-BR-AntonioNeural', 'female': 'pt-BR-FranciscaNeural'},
Language.AFRIKAANS: {'male': 'af-ZA-WillemNeural', 'female': 'af-ZA-AdriNeural'},
Language.AMHARIC: {'male': 'am-ET-AmehaNeural', 'female': 'am-ET-MekdesNeural'},
Language.AZERBAIJANI: {'male': 'az-AZ-BabekNeural', 'female': 'az-AZ-BanuNeural'},
Language.BULGARIAN: {'male': 'bg-BG-BorislavNeural', 'female': 'bg-BG-KalinaNeural'},
Language.BENGALI: {'male': 'bn-BD-PradeepNeural', 'female': 'bn-BD-NabanitaNeural'},
Language.BOSNIAN: {'male': 'bs-BA-GoranNeural', 'female': 'bs-BA-VesnaNeural'},
Language.CATALAN: {'male': 'ca-ES-EnricNeural', 'female': 'ca-ES-JoanaNeural'},
Language.CZECH: {'male': 'cs-CZ-AntoninNeural', 'female': 'cs-CZ-VlastaNeural'},
Language.WELSH: {'male': 'cy-GB-AledNeural', 'female': 'cy-GB-NiaNeural'},
Language.DANISH: {'male': 'da-DK-JeppeNeural', 'female': 'da-DK-ChristelNeural'},
Language.GREEK: {'male': 'el-GR-NestorasNeural', 'female': 'el-GR-AthinaNeural'},
Language.ESTONIAN: {'male': 'et-EE-KertNeural', 'female': 'et-EE-AnuNeural'},
Language.PERSIAN: {'male': 'fa-IR-FaridNeural', 'female': 'fa-IR-DilaraNeural'},
Language.FINNISH: {'male': 'fi-FI-HarriNeural', 'female': 'fi-FI-NooraNeural'},
Language.FILIPINO: {'male': 'fil-PH-AngeloNeural', 'female': 'fil-PH-BlessicaNeural'},
Language.GALICIAN: {'male': 'gl-ES-RoiNeural', 'female': 'gl-ES-SabelaNeural'},
Language.GUJARATI: {'male': 'gu-IN-NiranjanNeural', 'female': 'gu-IN-DhwaniNeural'},
Language.HEBREW: {'male': 'he-IL-AvriNeural', 'female': 'he-IL-HilaNeural'},
Language.HINDI: {'male': 'hi-IN-MadhurNeural', 'female': 'hi-IN-SwaraNeural'},
Language.CROATIAN: {'male': 'hr-HR-SreckoNeural', 'female': 'hr-HR-GabrijelaNeural'},
Language.HUNGARIAN: {'male': 'hu-HU-TamasNeural', 'female': 'hu-HU-NoemiNeural'},
Language.INDONESIAN: {'male': 'id-ID-ArdiNeural', 'female': 'id-ID-GadisNeural'},
Language.ICELANDIC: {'male': 'is-IS-GunnarNeural', 'female': 'is-IS-GudrunNeural'},
Language.ITALIAN: {'male': 'it-IT-DiegoNeural', 'female': 'it-IT-ElsaNeural'},
Language.JAPANESE: {'male': 'ja-JP-KeitaNeural', 'female': 'ja-JP-NanamiNeural'},
Language.JAVANESE: {'male': 'jv-ID-DimasNeural', 'female': 'jv-ID-SitiNeural'},
Language.GEORGIAN: {'male': 'ka-GE-GiorgiNeural', 'female': 'ka-GE-EkaNeural'},
Language.KAZAKH: {'male': 'kk-KZ-DauletNeural', 'female': 'kk-KZ-AigulNeural'},
Language.KHMER: {'male': 'km-KH-PisethNeural', 'female': 'km-KH-SreymomNeural'},
Language.KANNADA: {'male': 'kn-IN-GaganNeural', 'female': 'kn-IN-SapnaNeural'},
Language.KOREAN: {'male': 'ko-KR-InJoonNeural', 'female': 'ko-KR-SunHiNeural'},
Language.LAO: {'male': 'lo-LA-KeomanyNeural', 'female': 'lo-LA-ChanthavongNeural'},
Language.LITHUANIAN: {'male': 'lt-LT-LeonasNeural', 'female': 'lt-LT-OnaNeural'},
Language.LATVIAN: {'male': 'lv-LV-NilsNeural', 'female': 'lv-LV-EveritaNeural'},
Language.MACEDONIAN: {'male': 'mk-MK-AleksandarNeural', 'female': 'mk-MK-MarijaNeural'},
Language.MALAYALAM: {'male': 'ml-IN-MidhunNeural', 'female': 'ml-IN-MidhunNeural'},
Language.MONGOLIAN: {'male': 'mn-MN-YesuiNeural', 'female': 'mn-MN-BataaNeural'},
Language.MARATHI: {'male': 'mr-IN-ManoharNeural', 'female': 'mr-IN-AarohiNeural'},
Language.MALAY: {'male': 'ms-MY-OsmanNeural', 'female': 'ms-MY-YasminNeural'},
Language.MALTESE: {'male': 'mt-MT-JosephNeural', 'female': 'mt-MT-GraceNeural'},
Language.MYANMAR: {'male': 'my-MM-ThihaNeural', 'female': 'my-MM-NilarNeural'},
Language.NORWEGIAN: {'male': 'nb-NO-FinnNeural', 'female': 'nb-NO-PernilleNeural'},
Language.NEPALI: {'male': 'ne-NP-SagarNeural', 'female': 'ne-NP-HemkalaNeural'},
Language.DUTCH: {'male': 'nl-NL-MaartenNeural', 'female': 'nl-NL-FennaNeural'},
Language.NORWEGIAN_BOKMAL: {'male': 'nb-NO-FinnNeural', 'female': 'nb-NO-PernilleNeural'},
Language.NORWEGIAN_NYNORSK: {'male': 'nb-NO-FinnNeural', 'female': 'nb-NO-PernilleNeural'},
Language.PASHTO: {'male': 'ps-AF-LatifaNeural', 'female': 'ps-AF-GulNawazNeural'},
Language.ROMANIAN: {'male': 'ro-RO-EmilNeural', 'female': 'ro-RO-AlinaNeural'},
Language.RUSSIAN: {'male': 'ru-RU-DmitryNeural', 'female': 'ru-RU-SvetlanaNeural'},
Language.SINHALA: {'male': 'si-LK-SameeraNeural', 'female': 'si-LK-ThiliniNeural'},
Language.SLOVAK: {'male': 'sk-SK-LukasNeural', 'female': 'sk-SK-ViktoriaNeural'},
Language.SLOVENIAN: {'male': 'sl-SI-RokNeural', 'female': 'sl-SI-PetraNeural'},
Language.SOMALI: {'male': 'so-SO-MuuseNeural', 'female': 'so-SO-UbaxNeural'},
Language.ALBANIAN: {'male': 'sq-AL-IlirNeural', 'female': 'sq-AL-AnilaNeural'},
Language.SERBIAN: {'male': 'sr-RS-NicholasNeural', 'female': 'sr-RS-SophieNeural'},
Language.SUNDANESE: {'male': 'su-ID-JajangNeural', 'female': 'su-ID-TutiNeural'},
Language.SWEDISH: {'male': 'sv-SE-MattiasNeural', 'female': 'sv-SE-SofieNeural'},
Language.SWAHILI: {'male': 'sw-TZ-DaudiNeural', 'female': 'sw-TZ-DaudiNeural'},
Language.TAMIL: {'male': 'ta-IN-ValluvarNeural', 'female': 'ta-IN-PallaviNeural'},
Language.TELUGU: {'male': 'te-IN-MohanNeural', 'female': 'te-IN-ShrutiNeural'},
Language.THAI: {'male': 'th-TH-NiwatNeural', 'female': 'th-TH-PremwadeeNeural'},
Language.TURKISH: {'male': 'tr-TR-AhmetNeural', 'female': 'tr-TR-EmelNeural'},
Language.UKRAINIAN: {'male': 'uk-UA-OstapNeural', 'female': 'uk-UA-PolinaNeural'},
Language.URDU: {'male': 'ur-PK-AsadNeural', 'female': 'ur-PK-UzmaNeural'},
Language.UZBEK: {'male': 'uz-UZ-SardorNeural', 'female': 'uz-UZ-MadinaNeural'},
Language.VIETNAMESE: {'male': 'vi-VN-NamMinhNeural', 'female': 'vi-VN-HoaiMyNeural'},
Language.CHINESE: {'male': 'zh-CN-YunxiNeural', 'female': 'zh-CN-XiaoxiaoNeural'},
Language.ZULU: {'male': 'zu-ZA-ThembaNeural', 'female': 'zu-ZA-ThandoNeural'}
}
================================================
FILE: shortGPT/config/path_utils.py
================================================
import os
import platform
import sys
import subprocess
import subprocess
import tempfile
def search_program(program_name):
try:
search_cmd = "where" if platform.system() == "Windows" else "which"
return subprocess.check_output([search_cmd, program_name]).decode().strip()
except subprocess.CalledProcessError:
return None
def get_program_path(program_name):
program_path = search_program(program_name)
return program_path
def is_running_in_colab():
return 'COLAB_GPU' in os.environ
def handle_path(path, extension = ".mp4"):
if 'https' in path:
if is_running_in_colab():
temp_file = tempfile.NamedTemporaryFile(suffix= extension, delete=False)
# The '-y' option overwrites the output file if it already exists.
command = ['ffmpeg', '-y', '-i', path, temp_file.name]
subprocess.run(command, check=True)
temp_file.close()
return temp_file.name
return path
================================================
FILE: shortGPT/database/README.md
================================================
# Database Module Documentation
The `database` module provides classes for managing database documents and data in the ShortGPT application. The module consists of three files:
- `content_data_manager.py`: Defines the `ContentDataManager` class, which manages the content data for a document in the database.
- `content_database.py`: Defines the `ContentDatabase` class, which provides methods for creating and accessing `ContentDataManager` instances.
- `db_document.py`: Defines the `DatabaseDocument` abstract base class and the `TinyMongoDocument` class, which represents a document in a TinyMongo database.
## File: content_data_manager.py
The `content_data_manager.py` file contains the `ContentDataManager` class, which is responsible for managing the content data for a document in the database.
### Class: ContentDataManager
#### `__init__(self, db_doc: DatabaseDocument, content_type: str, new=False)`
- Initializes a new instance of the `ContentDataManager` class.
- Parameters:
- `db_doc`: The `DatabaseDocument` instance representing the document in the database.
- `content_type`: The type of content to be managed by the `ContentDataManager`.
- `new`: (Optional) A boolean flag indicating whether the document is new or existing. Default is `False`.
#### `save(self, key, value)`
- Saves the specified key-value pair to the document.
- Parameters:
- `key`: The key of the data to be saved.
- `value`: The value of the data to be saved.
#### `get(self, key)`
- Retrieves the value associated with the specified key from the document.
- Parameters:
- `key`: The key of the data to be retrieved.
- Returns:
- The value associated with the specified key.
#### `_getId(self)`
- Retrieves the ID of the document.
- Returns:
- The ID of the document.
#### `delete(self)`
- Deletes the document from the database.
#### `__str__(self)`
- Returns a string representation of the document.
## File: content_database.py
The `content_database.py` file contains the `ContentDatabase` class, which provides methods for creating and accessing `ContentDataManager` instances.
### Class: ContentDatabase
#### `instanciateContentDataManager(self, id: str, content_type: str, new=False)`
- Creates a new `ContentDataManager` instance for the specified document ID and content type.
- Parameters:
- `id`: The ID of the document.
- `content_type`: The type of content to be managed by the `ContentDataManager`.
- `new`: (Optional) A boolean flag indicating whether the document is new or existing. Default is `False`.
- Returns:
- A new `ContentDataManager` instance.
#### `getContentDataManager(self, id, content_type: str)`
- Retrieves an existing `ContentDataManager` instance for the specified document ID and content type.
- Parameters:
- `id`: The ID of the document.
- `content_type`: The type of content to be managed by the `ContentDataManager`.
- Returns:
- The existing `ContentDataManager` instance, or `None` if not found.
#### `createContentDataManager(self, content_type: str) -> ContentDataManager`
- Creates a new `ContentDataManager` instance for a new document with the specified content type.
- Parameters:
- `content_type`: The type of content to be managed by the `ContentDataManager`.
- Returns:
- A new `ContentDataManager` instance.
## File: db_document.py
The `db_document.py` file contains the `DatabaseDocument` abstract base class and the `TinyMongoDocument` class, which represents a document in a TinyMongo database.
### Abstract Class: DatabaseDocument
- An abstract base class that defines the interface for a database document.
- Subclasses must implement the abstract methods:
- `_save(self, key, data)`
- `_get(self, key)`
- `_getId(self)`
- `__str__(self)`
- `_delete(self)`
### Class: TinyMongoDocument
- Represents a document in a TinyMongo database.
- Inherits from the `DatabaseDocument` abstract base class.
#### `__init__(self, db_name: str, collection_name: str, document_id: str, create=False)`
- Initializes a new instance of the `TinyMongoDocument` class.
- Parameters:
- `db_name`: The name of the database.
- `collection_name`: The name of the collection.
- `document_id`: The ID of the document.
- `create`: (Optional) A boolean flag indicating whether to create the document if it doesn't exist. Default is `False`.
#### `exists(self)`
- Checks if the document exists in the database.
- Returns:
- `True` if the document exists, `False` otherwise.
#### `_save(self, data)`
- Saves the specified data to the document.
- Parameters:
- `data`: The data to be saved.
#### `_get(self, key=None)`
- Retrieves the value associated with the specified key from the document.
- Parameters:
- `key`: (Optional) The key of the data to be retrieved. If not specified, returns the entire document.
- Returns:
- The value associated with the specified key, or the entire document if no key is specified.
#### `_delete(self, key)`
- Deletes the specified key from the document.
- Parameters:
- `key`: The key to be deleted.
#### `_getId(self)`
- Retrieves the ID of the document.
- Returns:
- The ID of the document.
#### `__str__(self)`
- Returns a string representation of the document.
================================================
FILE: shortGPT/database/__init__.py
================================================
================================================
FILE: shortGPT/database/content_data_manager.py
================================================
from shortGPT.database.db_document import AbstractDatabaseDocument
class ContentDataManager():
def __init__(self, db_doc: AbstractDatabaseDocument, content_type: str, new=False):
self.contentType = content_type
self.db_doc = db_doc
if new:
self.db_doc._save({
'content_type': content_type,
'ready_to_upload': False,
'last_completed_step': 0,
})
def save(self, key, value):
self.db_doc._save({key: value})
def get(self, key):
return self.db_doc._get(key)
def _getId(self):
return self.db_doc._getId()
def delete(self):
self.db_doc.delete()
def __str__(self):
return self.db_doc.__str__()
================================================
FILE: shortGPT/database/content_database.py
================================================
from uuid import uuid4
from shortGPT.database.db_document import TINY_MONGO_DATABASE, TinyMongoDocument
from shortGPT.database.content_data_manager import ContentDataManager
class ContentDatabase:
def __init__(self, ):
self.content_collection = TINY_MONGO_DATABASE["content_db"]["content_documents"]
def instanciateContentDataManager(self, id: str, content_type: str, new=False):
db_doc = TinyMongoDocument("content_db", "content_documents", id)
return ContentDataManager(db_doc, content_type, new)
def getContentDataManager(self, id, content_type: str):
try:
db_doc = TinyMongoDocument("content_db", "content_documents", id)
return ContentDataManager(db_doc, content_type, False)
except:
return None
def createContentDataManager(self, content_type: str) -> ContentDataManager:
try:
new_short_id = uuid4().hex[:24]
db_doc = TinyMongoDocument("content_db", "content_documents", new_short_id, True)
return ContentDataManager(db_doc, content_type, True)
except:
return None
================================================
FILE: shortGPT/database/db_document.py
================================================
import threading
from abc import ABC, abstractmethod
import tinydb
import tinymongo as tm
class AbstractDatabaseDocument(ABC):
@abstractmethod
def _save(self, key, data):
'''Save the data in the database'''
pass
@abstractmethod
def _get(self, key):
'''Get the data from the database'''
pass
@abstractmethod
def _getId(self):
'''Get the id of the document'''
pass
@abstractmethod
def __str__(self):
'''Return the string representation of the document'''
pass
@abstractmethod
def _delete(self):
'''Delete the document'''
pass
class TinyMongoClient(tm.TinyMongoClient):
@property
def _storage(self):
return tinydb.storages.JSONStorage
TINY_MONGO_DATABASE = TinyMongoClient("./.database")
class TinyMongoDocument(AbstractDatabaseDocument):
_lock = threading.Lock()
def __init__(self, db_name: str, collection_name: str, document_id: str, create=False):
self.collection = TINY_MONGO_DATABASE[db_name][collection_name]
self.collection_name = collection_name
self.document_id = document_id
if (not self.exists()):
if create:
self.collection.insert_one({"_id": document_id})
else:
raise Exception(f"The document with id {document_id} in collection {collection_name} of database {db_name} does not exist")
def exists(self):
with self._lock:
return self.collection.find({"_id": self.document_id}).count() == 1
def _save(self, data):
with self._lock:
try:
update_data = {'$set': {}}
for key, value in data.items():
path_parts = key.split(".")
if len(path_parts) > 1:
root_key = ".".join(path_parts[:-1])
last_key = path_parts[-1]
current_value = self._get(root_key)
if not isinstance(current_value, dict):
current_value = {}
current_value[last_key] = value
update_data['$set'][root_key] = current_value
else:
update_data['$set'][key] = value
self.collection.update_one({'_id': self.document_id}, update_data)
except Exception as e:
print(f"Error saving data: {e}")
def _get(self, key=None):
with self._lock:
try:
document = self.collection.find_one({'_id': self.document_id})
if not key:
del document['_id']
return document
keys = key.split(".")
value = document[keys[0]]
for k in keys[1:]:
value = value[k]
return value
except Exception as e:
#print(f"Error getting value for key '{key}': {e}")
return None
def _delete(self, key):
with self._lock:
try:
document = self.collection.find_one({'_id': self.document_id})
if key in document:
del document[key]
self.collection.remove({'_id': self.document_id})
self.collection.insert(document)
else:
print(f"Key '{key}' not found in the document")
except Exception as e:
print(f"Error deleting key '{key}': {e}")
def _getId(self):
return self.document_id
def __str__(self):
with self._lock:
document = self.collection.find_one({'_id': self.document_id})
return str(document)
================================================
FILE: shortGPT/editing_framework/README.md
================================================
# Editing Framework Module Documentation
The `editing_framework` module provides a set of classes and functions for editing videos and images. This module is part of the `shortGPT` project and is designed to be used with the `CoreEditingEngine` class to generate videos and images based on a specified editing schema.
## Module Files
The `editing_framework` module consists of three files:
1. `rendering_logger.py`: This file contains the `MoviepyProgressLogger` class, which is used for logging the progress of the rendering process.
2. `editing_engine.py`: This file contains the `EditingStep` and `Flow` enums, as well as the `EditingEngine` class, which is the main class for managing the editing process.
3. `core_editing_engine.py`: This file contains the `CoreEditingEngine` class, which is responsible for generating videos and images based on the editing schema.
## `rendering_logger.py`
This file defines the `MoviepyProgressLogger` class, which is a subclass of `ProgressBarLogger` from the `proglog` module. It provides a callback function for logging the progress of the rendering process. The `MoviepyProgressLogger` class has the following methods:
### `__init__(self, callBackFunction=None)`
- Initializes a new instance of the `MoviepyProgressLogger` class.
- Parameters:
- `callBackFunction`: An optional callback function that will be called with the progress string.
### `bars_callback(self, bar, attr, value, old_value=None)`
- This method is called every time the logger progress is updated.
- It calculates the rendering progress and the estimated time left.
- It calls the callback function with the progress string or prints the progress string if no callback function is provided.
- Parameters:
- `bar`: The progress bar name.
- `attr`: The progress attribute name.
- `value`: The current progress value.
- `old_value`: The previous progress value.
### `format_time(self, seconds)`
- Formats the given time in seconds to the format "mm:ss".
- Parameters:
- `seconds`: The time in seconds.
- Returns:
- The formatted time string.
## `editing_engine.py`
This file defines the `EditingStep` and `Flow` enums, as well as the `EditingEngine` class, which is responsible for managing the editing process. The `EditingEngine` class has the following methods:
### `__init__(self)`
- Initializes a new instance of the `EditingEngine` class.
- It initializes the editing step tracker and the editing schema.
### `addEditingStep(self, editingStep: EditingStep, args: Dict[str, any] = {})`
- Adds an editing step to the editing schema with the specified arguments.
- Parameters:
- `editingStep`: The editing step to add.
- `args`: The arguments for the editing step.
- Raises:
- `Exception`: If a required argument is missing.
### `ingestFlow(self, flow: Flow, args)`
- Ingests a flow into the editing schema with the specified arguments.
- Parameters:
- `flow`: The flow to ingest.
- `args`: The arguments for the flow.
- Raises:
- `Exception`: If a required argument is missing.
### `dumpEditingSchema(self)`
- Returns the current editing schema.
### `renderVideo(self, outputPath, logger=None)`
- Renders the video based on the editing schema and saves it to the specified output path.
- Parameters:
- `outputPath`: The path to save the rendered video.
- `logger`: An optional logger object for logging the rendering progress.
### `renderImage(self, outputPath)`
- Renders the image based on the editing schema and saves it to the specified output path.
- Parameters:
- `outputPath`: The path to save the rendered image.
## `core_editing_engine.py`
This file defines the `CoreEditingEngine` class, which is responsible for generating videos and images based on the editing schema. The `CoreEditingEngine` class has the following methods:
### `generate_image(self, schema:Dict[str, Any], output_file)`
- Generates an image based on the editing schema and saves it to the specified output file.
- Parameters:
- `schema`: The editing schema.
- `output_file`: The path to save the generated image.
- Returns:
- The path to the saved image.
### `generate_video(self, schema:Dict[str, Any], output_file, logger=None)`
- Generates a video based on the editing schema and saves it to the specified output file.
- Parameters:
- `schema`: The editing schema.
- `output_file`: The path to save the generated video.
- `logger`: An optional logger object for logging the rendering progress.
- Returns:
- The path to the saved video.
### `process_common_actions(self, clip: Union[VideoFileClip, ImageClip, TextClip, AudioFileClip], actions: List[Dict[str, Any]])`
- Processes common actions for the given clip.
- Parameters:
- `clip`: The clip to process.
- `actions`: The list of actions to apply to the clip.
- Returns:
- The processed clip.
### `process_common_visual_actions(self, clip: Union[VideoFileClip, ImageClip, TextClip], actions: List[Dict[str, Any]])`
- Processes common visual clip actions for the given clip.
- Parameters:
- `clip`: The clip to process.
- `actions`: The list of actions to apply to the clip.
- Returns:
- The processed clip.
### `process_audio_actions(self, clip: AudioFileClip, actions: List[Dict[str, Any]])`
- Processes audio actions for the given audio clip.
- Parameters:
- `clip`: The audio clip to process.
- `actions`: The list of actions to apply to the audio clip.
- Returns:
- The processed audio clip.
### `process_video_asset(self, asset: Dict[str, Any])`
- Processes a video asset based on the asset parameters and actions.
- Parameters:
- `asset`: The video asset to process.
- Returns:
- The processed video clip.
### `process_image_asset(self, asset: Dict[str, Any])`
- Processes an image asset based on the asset parameters and actions.
- Parameters:
- `asset`: The image asset to process.
- Returns:
- The processed image clip.
### `process_text_asset(self, asset: Dict[str, Any])`
- Processes a text asset based on the asset parameters and actions.
- Parameters:
- `asset`: The text asset to process.
- Returns:
- The processed text clip.
### `process_audio_asset(self, asset: Dict[str, Any])`
- Processes an audio asset based on the asset parameters and actions.
- Parameters:
- `asset`: The audio asset to process.
- Returns:
- The processed audio clip.
### `__normalize_image(self, clip)`
- Normalizes the image clip.
- Parameters:
- `clip`: The image clip to normalize.
- Returns:
- The normalized image clip.
### `__normalize_frame(self, frame)`
- Normalizes the given frame.
- Parameters:
- `frame`: The frame to normalize.
- Returns:
- The normalized frame.
================================================
FILE: shortGPT/editing_framework/__init__.py
================================================
================================================
FILE: shortGPT/editing_framework/core_editing_engine.py
================================================
from urllib.error import HTTPError
from shortGPT.config.path_utils import get_program_path
import os
from shortGPT.config.path_utils import handle_path
import numpy as np
from typing import Any, Dict, List, Union
from moviepy import (AudioFileClip, CompositeVideoClip, CompositeAudioClip, ImageClip,
TextClip, VideoFileClip, AudioClip)
from moviepy.Clip import Clip
from moviepy import vfx, afx
from shortGPT.editing_framework.rendering_logger import MoviepyProgressLogger
import json
def load_schema(json_path):
return json.loads(open(json_path, 'r', encoding='utf-8').read())
class CoreEditingEngine:
def generate_image(self, schema:Dict[str, Any],output_file , logger=None):
assets = dict(sorted(schema['visual_assets'].items(), key=lambda item: item[1]['z']))
clips = []
for asset_key in assets:
asset = assets[asset_key]
asset_type = asset['type']
if asset_type == 'image':
clip = self.process_image_asset(asset)
elif asset_type == 'text':
clip = self.process_text_asset(asset)
clips.append(clip)
else:
raise ValueError(f'Invalid asset type: {asset_type}')
clips.append(clip)
image = CompositeVideoClip(clips)
image.save_frame(output_file)
return output_file
def generate_video(self, schema:Dict[str, Any], output_file, logger=None, force_duration=None, threads=None) -> None:
visual_assets = dict(sorted(schema['visual_assets'].items(), key=lambda item: item[1]['z']))
audio_assets = dict(sorted(schema['audio_assets'].items(), key=lambda item: item[1]['z']))
visual_clips = []
for asset_key in visual_assets:
asset = visual_assets[asset_key]
asset_type = asset['type']
if asset_type == 'video':
clip = self.process_video_asset(asset)
elif asset_type == 'image':
# clip = self.process_image_asset(asset)
try:
clip = self.process_image_asset(asset)
except Exception as e:
print(f"Failed to load image {asset['parameters']['url']}. Error : {str(e)}")
continue
elif asset_type == 'text':
clip = self.process_text_asset(asset)
else:
raise ValueError(f'Invalid asset type: {asset_type}')
visual_clips.append(clip)
audio_clips = []
for asset_key in audio_assets:
asset = audio_assets[asset_key]
asset_type = asset['type']
if asset_type == "audio":
audio_clip = self.process_audio_asset(asset)
else:
raise ValueError(f"Invalid asset type: {asset_type}")
audio_clips.append(audio_clip)
video = CompositeVideoClip(visual_clips)
if(audio_clips):
audio = CompositeAudioClip(audio_clips)
video = video.with_audio(audio)
video = video.with_duration(audio.duration)
if force_duration:
video = video.with_duration(force_duration)
if logger:
my_logger = MoviepyProgressLogger(callBackFunction=logger)
video.write_videofile(output_file, threads=threads,codec='libx264', audio_codec='aac', fps=25, preset='veryfast', logger=my_logger)
else:
video.write_videofile(output_file, threads=threads,codec='libx264', audio_codec='aac', fps=25, preset='veryfast')
return output_file
def generate_audio(self, schema:Dict[str, Any], output_file, logger=None) -> None:
audio_assets = dict(sorted(schema['audio_assets'].items(), key=lambda item: item[1]['z']))
audio_clips = []
for asset_key in audio_assets:
asset = audio_assets[asset_key]
asset_type = asset['type']
if asset_type == "audio":
audio_clip = self.process_audio_asset(asset)
else:
raise ValueError(f"Invalid asset type: {asset_type}")
audio_clips.append(audio_clip)
audio = CompositeAudioClip(audio_clips)
audio.fps = 44100
if logger:
my_logger = MoviepyProgressLogger(callBackFunction=logger)
audio.write_audiofile(output_file, logger=my_logger)
else:
audio.write_audiofile(output_file)
return output_file
# Process common actions
def process_common_actions(self,
clip: Union[VideoFileClip, ImageClip, TextClip, AudioFileClip],
actions: List[Dict[str, Any]]) -> Union[VideoFileClip, AudioFileClip, ImageClip, TextClip]:
for action in actions:
if action['type'] == 'set_time_start':
clip = clip.with_start(action['param'])
continue
if action['type'] == 'set_time_end':
clip = clip.with_end(action['param'])
continue
if action['type'] == 'subclip':
clip = clip.subclipped(**action['param'])
continue
return clip
# Process common visual clip actions
def process_common_visual_actions(self,
clip: Clip,
actions: List[Dict[str, Any]]) -> Union[VideoFileClip, ImageClip, TextClip]:
clip = self.process_common_actions(clip, actions)
for action in actions:
if action['type'] == 'resize':
clip = clip.with_effects([vfx.Resize(**action['param'])])
continue
if action['type'] == 'crop':
clip = clip.with_effects([vfx.Crop(**action['param'])])
continue
if action['type'] == 'screen_position':
clip = clip.with_position(**action['param'])
continue
if action['type'] == 'green_screen':
params = action['param']
color = params['color'] if params['color'] else [52, 255, 20]
thr = params["threshold"] if params["threshold"] else 100
s = params['stiffness'] if params['stiffness'] else 5
clip = clip.with_effects([vfx.MaskColor(color=color,threshold=thr, stiffness=s)])
continue
if action['type'] == 'normalize_image':
clip = clip.image_transform(self.__normalize_frame)
continue
if action['type'] == 'auto_resize_image':
ar = clip.aspect_ratio
height = action['param']['maxHeight']
width = action['param']['maxWidth']
if ar <1:
clip = clip.with_effects([vfx.Resize((height*ar, height))])
else:
clip = clip.with_effects([vfx.Resize((width, width/ar))])
continue
return clip
# Process audio actions
def process_audio_actions(self, clip: AudioClip,
actions: List[Dict[str, Any]]) -> AudioClip:
clip = self.process_common_actions(clip, actions)
for action in actions:
if action['type'] == 'normalize_music':
clip = clip.with_effects([afx.AudioNormalize()])
continue
if action['type'] == 'loop_background_music':
target_duration = action['param']
start = clip.duration * 0.15
clip = clip.subclipped(start)
clip = clip.with_effects([afx.AudioLoop(duration=target_duration)])
continue
if action['type'] == 'volume_percentage':
clip = clip.with_effects([afx.MultiplyVolume(action['param'])])
continue
return clip
# Process individual asset types
def process_video_asset(self, asset: Dict[str, Any]) -> VideoFileClip:
params = {
'filename': handle_path(asset['parameters']['url'])
}
if 'audio' in asset['parameters']:
params['audio'] = asset['parameters']['audio']
clip = VideoFileClip(**params)
return self.process_common_visual_actions(clip, asset['actions'])
def process_image_asset(self, asset: Dict[str, Any]) -> ImageClip:
clip = ImageClip(asset['parameters']['url'])
return self.process_common_visual_actions(clip, asset['actions'])
def process_text_asset(self, asset: Dict[str, Any]) -> TextClip:
text_clip_params = asset['parameters']
if not (any(key in text_clip_params for key in ['text','fontsize', 'size'])):
raise Exception('You must include at least a size or a fontsize to determine the size of your text')
text_method = text_clip_params.get('method', 'label')
clip_info = {
'text': text_clip_params['text'],
'font': text_clip_params.get('font'),
'font_size': text_clip_params.get('font_size'),
'color': text_clip_params.get('color'),
'stroke_width': text_clip_params.get('stroke_width'),
'stroke_color': text_clip_params.get('stroke_color'),
'size': text_clip_params.get('size'),
'method': text_method,
'text_align': text_clip_params.get('text_align', 'center')
}
clip_info = {k: v for k, v in clip_info.items() if v is not None}
clip = TextClip(**clip_info)
return self.process_common_visual_actions(clip, asset['actions'])
def process_audio_asset(self, asset: Dict[str, Any]) -> AudioFileClip:
clip = AudioFileClip(asset['parameters']['url'])
return self.process_audio_actions(clip, asset['actions'])
def __normalize_image(self, clip):
def f(get_frame, t):
if f.normalized_frame is not None:
return f.normalized_frame
else:
frame = get_frame(t)
f.normalized_frame = self.__normalize_frame(frame)
return f.normalized_frame
f.normalized_frame = None
return clip.fl(f)
def __normalize_frame(self, frame):
shape = np.shape(frame)
[dimensions, ] = np.shape(shape)
if dimensions == 2:
(height, width) = shape
normalized_frame = np.zeros((height, width, 3))
for y in range(height):
for x in range(width):
grey_value = frame[y][x]
normalized_frame[y][x] = (grey_value, grey_value, grey_value)
return normalized_frame
else:
return frame
================================================
FILE: shortGPT/editing_framework/editing_engine.py
================================================
import json
from typing import Any, Dict, List, Union
from enum import Enum
import collections.abc
from shortGPT.editing_framework.core_editing_engine import CoreEditingEngine
def update_dict(d, u):
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update_dict(d.get(k, {}), v)
else:
d[k] = v
return d
class EditingStep(Enum):
CROP_1920x1080 = "crop_1920x1080_to_short.json"
ADD_CAPTION_SHORT = "make_caption.json"
ADD_CAPTION_SHORT_ARABIC = "make_caption_arabic.json"
ADD_CAPTION_LANDSCAPE = "make_caption_landscape.json"
ADD_CAPTION_LANDSCAPE_ARABIC = "make_caption_arabic_landscape.json"
ADD_WATERMARK = "show_watermark.json"
ADD_SUBSCRIBE_ANIMATION = "subscribe_animation.json"
SHOW_IMAGE = "show_top_image.json"
ADD_VOICEOVER_AUDIO = "add_voiceover.json"
ADD_BACKGROUND_MUSIC = "background_music.json"
ADD_REDDIT_IMAGE = "show_reddit_image.json"
ADD_BACKGROUND_VIDEO = "add_background_video.json"
INSERT_AUDIO = "insert_audio.json"
EXTRACT_AUDIO = "extract_audio.json"
ADD_BACKGROUND_VOICEOVER = "add_background_voiceover.json"
class Flow(Enum):
WHITE_REDDIT_IMAGE_FLOW = "build_reddit_image.json"
from pathlib import Path
_here = Path(__file__).parent
STEPS_PATH = (_here / 'editing_steps/').resolve()
FLOWS_PATH = (_here / 'flows/').resolve()
class EditingEngine:
def __init__(self,):
self.editing_step_tracker = dict((step, 0) for step in EditingStep)
self.schema = {'visual_assets': {}, 'audio_assets': {}}
def addEditingStep(self, editingStep: EditingStep, args: Dict[str, any] = {}):
json_step = json.loads(
open(STEPS_PATH / f"{editingStep.value}", 'r', encoding='utf-8').read())
step_name, editingStepDict = list(json_step.items())[0]
if 'inputs' in editingStepDict:
required_args = (editingStepDict['inputs']['actions'] if 'actions' in editingStepDict['inputs'] else []) + (editingStepDict['inputs']['parameters'] if 'parameters' in editingStepDict['inputs'] else [])
for required_argument in required_args:
if required_argument not in args:
raise Exception(
f"Error. '{required_argument}' input missing, you must include it to use this editing step")
if required_args:
pass
action_names = [action['type'] for action in editingStepDict['actions']
] if 'actions' in editingStepDict else []
param_names = [param_name for param_name in editingStepDict['parameters']
] if 'parameters' in editingStepDict else []
for arg_name in args:
if ('inputs' in editingStepDict):
if 'parameters' in editingStepDict['inputs'] and arg_name in param_names:
editingStepDict['parameters'][arg_name] = args[arg_name]
pass
if 'actions' in editingStepDict['inputs'] and arg_name in action_names:
for i, action in enumerate(editingStepDict['actions']):
if action['type'] == arg_name:
editingStepDict['actions'][i]['param'] = args[arg_name]
if editingStepDict['type'] == 'audio':
self.schema['audio_assets'][f"{step_name}_{self.editing_step_tracker[editingStep]}"] = editingStepDict
else:
self.schema['visual_assets'][f"{step_name}_{self.editing_step_tracker[editingStep]}"] = editingStepDict
self.editing_step_tracker[editingStep] += 1
def ingestFlow(self, flow: Flow, args):
json_flow = json.loads(open(FLOWS_PATH / f"{flow.value}", 'r', encoding='utf-8').read())
for required_argument in list(json_flow['inputs'].keys()):
if required_argument not in args:
raise Exception(
f"Error. '{required_argument}' input missing, you must include it to use this editing step")
update = args[required_argument]
for path_key in reversed(json_flow['inputs'][required_argument].split("/")):
update = {path_key: update}
json_flow = update_dict(json_flow, update)
self.schema = json_flow
def dumpEditingSchema(self):
return self.schema
def renderVideo(self, outputPath, logger=None):
engine = CoreEditingEngine()
engine.generate_video(self.schema, outputPath, logger=logger)
def renderImage(self, outputPath, logger=None):
engine = CoreEditingEngine()
engine.generate_image(self.schema, outputPath, logger=logger)
def generateAudio(self, outputPath, logger=None):
engine = CoreEditingEngine()
engine.generate_audio(self.schema, outputPath, logger=logger)
# import json
# from typing import Any, Dict, List, Union
# from enum import Enum
# import collections.abc
# import os
# from shortGPT.editing_framework.core_editing_engine import CoreEditingEngine
# def update_dict(d, u):
# for k, v in u.items():
# if isinstance(v, collections.abc.Mapping):
# d[k] = update_dict(d.get(k, {}), v)
# else:
# d[k] = v
# return d
# class EditingStep(Enum):
# CROP_1920x1080 = "crop_1920x1080_to_short.json"
# ADD_CAPTION_SHORT = "make_caption.json"
# ADD_CAPTION_SHORT_ARABIC = "make_caption_arabic.json"
# ADD_CAPTION_LANDSCAPE = "make_caption_landscape.json"
# ADD_CAPTION_LANDSCAPE_ARABIC = "make_caption_arabic_landscape.json"
# ADD_WATERMARK = "show_watermark.json"
# ADD_SUBSCRIBE_ANIMATION = "subscribe_animation.json"
# SHOW_IMAGE = "show_top_image.json"
# ADD_VOICEOVER_AUDIO = "add_voiceover.json"
# ADD_BACKGROUND_MUSIC = "background_music.json"
# ADD_REDDIT_IMAGE = "show_reddit_image.json"
# ADD_BACKGROUND_VIDEO = "add_background_video.json"
# INSERT_AUDIO = "insert_audio.json"
# EXTRACT_AUDIO = "extract_audio.json"
# ADD_BACKGROUND_VOICEOVER = "add_background_voiceover.json"
# class Flow(Enum):
# WHITE_REDDIT_IMAGE_FLOW = "build_reddit_image.json"
# STEPS_PATH = "shortGPT/editing_framework/editing_steps/"
# FLOWS_PATH = "shortGPT/editing_framework/flows/"
# class EditingTrack:
# def __init__(self, filepath=None):
# self.editing_step_tracker = dict((step, 0) for step in EditingStep)
# self.schema = {'visual_assets': {}, 'audio_assets': {}}
# self.filepath = filepath
# if filepath is not None:
# try:
# self.load_from_file(filepath)
# except FileNotFoundError:
# self.save_to_file(filepath)
# def addEditingStep(self, editingStep: EditingStep, args: Dict[str, any] = {}):
# json_step = json.loads(
# open(STEPS_PATH+editingStep.value, 'r', encoding='utf-8').read())
# step_name, editingStepDict = list(json_step.items())[0]
# if 'inputs' in editingStepDict:
# required_args = (editingStepDict['inputs']['actions'] if 'actions' in editingStepDict['inputs'] else []) + (editingStepDict['inputs']['parameters'] if 'parameters' in editingStepDict['inputs'] else [])
# for required_argument in required_args:
# if required_argument not in args:
# raise Exception(
# f"Error. '{required_argument}' input missing, you must include it to use this editing step")
# if required_args:
# pass
# action_names = [action['type'] for action in editingStepDict['actions']
# ] if 'actions' in editingStepDict else []
# param_names = [param_name for param_name in editingStepDict['parameters']
# ] if 'parameters' in editingStepDict else []
# for arg_name in args:
# if ('inputs' in editingStepDict):
# if 'parameters' in editingStepDict['inputs'] and arg_name in param_names:
# editingStepDict['parameters'][arg_name] = args[arg_name]
# pass
# if 'actions' in editingStepDict['inputs'] and arg_name in action_names:
# for i, action in enumerate(editingStepDict['actions']):
# if action['type'] == arg_name:
# editingStepDict['actions'][i]['param'] = args[arg_name]
# if editingStepDict['type'] == 'audio':
# self.schema['audio_assets'][f"{step_name}_{self.editing_step_tracker[editingStep]}"] = editingStepDict
# else:
# self.schema['visual_assets'][f"{step_name}_{self.editing_step_tracker[editingStep]}"] = editingStepDict
# self.editing_step_tracker[editingStep] += 1
# def ingestFlow(self, flow: Flow, args):
# json_flow = json.loads(open(FLOWS_PATH+flow.value, 'r', encoding='utf-8').read())
# for required_argument in list(json_flow['inputs'].keys()):
# if required_argument not in args:
# raise Exception(
# f"Error. '{required_argument}' input missing, you must include it to use this editing step")
# update = args[required_argument]
# for path_key in reversed(json_flow['inputs'][required_argument].split("/")):
# update = {path_key: update}
# json_flow = update_dict(json_flow, update)
# self.schema = json_flow
# def dumpEditingSchema(self):
# return self.schema
# def save_to_file(self):
# if self.file_path:
# with open(self.file_path, 'w') as f:
# json.dump({'step_tracker': {key.name: value for key, value in self.step_tracker.items()}, 'asset_schema': self.asset_schema}, f)
# def load_from_file(self):
# if self.file_path and os.path.exists(self.file_path):
# with open(self.file_path, 'r') as f:
# data = json.load(f)
# self.step_tracker = {EditingStep[key]: value for key, value in data.get('step_tracker', {}).items()}
# self.asset_schema = data.get('asset_schema', {'visual_assets': {}, 'audio_assets': {}})
# else:
# raise Exception("File does not exist")
# def renderVideo(self, outputPath, logger=None):
# engine = CoreEditingEngine()
# engine.generate_video(self.schema, outputPath, logger=logger)
# def renderImage(self, outputPath, logger=None):
# engine = CoreEditingEngine()
# engine.generate_image(self.schema, outputPath, logger=logger)
# def generateAudio(self, outputPath, logger=None):
# engine = CoreEditingEngine()
# engine.generate_audio(self.schema, outputPath, logger=logger)
================================================
FILE: shortGPT/editing_framework/editing_steps/__init__.py
================================================
================================================
FILE: shortGPT/editing_framework/editing_steps/add_background_video.json
================================================
{
"background_video": {
"type": "video",
"z": 0,
"inputs":{
"parameters": ["url"],
"actions": ["set_time_start", "set_time_end"]
},
"parameters": {
"url": null,
"audio": false
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/add_background_voiceover.json
================================================
{
"background_voiceover": {
"inputs": {
"parameters": ["url"],
"actions": ["volume_percentage"]
},
"type": "audio",
"z": -1,
"parameters": {
"url": null
},
"actions": [
{
"type": "volume_percentage",
"param": null
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/add_voiceover.json
================================================
{
"voiceover": {
"inputs": {
"parameters": [
"url"
]
},
"type": "audio",
"z": -1,
"parameters": {
"url": null
},
"actions": [
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/background_music.json
================================================
{
"background_music": {
"inputs": {
"parameters": ["url", "volume_percentage"],
"actions":["loop_background_music"]
},
"type": "audio",
"z": -1,
"parameters": {
"url": null
},
"actions": [
{
"type": "loop_background_music",
"param": {
"duration": null
}
},
{
"type":"normalize_audio",
"param":{}
},
{
"type": "volume_percentage",
"param": null
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/crop_1920x1080_to_short.json
================================================
{
"background_video": {
"type": "video",
"z": 0,
"inputs":{
"parameters": ["url"]
},
"parameters": {
"url": null,
"audio": false
},
"actions": [
{
"type": "crop",
"param": {
"x1": 420,
"y1": 0,
"width": 1080,
"height": 1080
}
},
{
"type": "resize",
"param": {
"width": 1920,
"height": 1920
}
},
{
"type": "crop",
"param": {
"x1": 420,
"y1": 0,
"width": 1080,
"height": 1920
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/extract_audio.json
================================================
{
"extract_audio": {
"inputs": {
"parameters": ["url"],
"actions": ["subclip", "set_time_start", "set_time_end"]
},
"type": "audio",
"z": -2,
"parameters": {
"url": null
},
"actions": [
{
"type": "subclip",
"param": null
},
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/insert_audio.json
================================================
{
"insert_audio": {
"inputs": {
"parameters": ["url"],
"actions": ["set_time_start", "set_time_end"]
},
"type": "audio",
"z": -1,
"parameters": {
"url": null
},
"actions": [
{
"type":"set_time_start",
"param":null
},
{
"type": "set_time_end",
"param": null
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/make_caption.json
================================================
{
"caption": {
"type": "text",
"z": 4,
"inputs":{
"parameters": ["text"],
"actions": ["set_time_start", "set_time_end"]
},
"parameters": {
"text": null,
"font_size": 100,
"font": "fonts/LuckiestGuy-Regular.ttf",
"color": "white",
"stroke_width": 3,
"stroke_color": "black",
"method": "caption",
"size":[900, 450]
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
},
{
"type": "screen_position",
"param": {
"pos": "center"
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/make_caption_arabic.json
================================================
{
"caption": {
"type": "text",
"z": 4,
"inputs":{
"parameters": ["text"],
"actions": ["set_time_start", "set_time_end"]
},
"parameters": {
"text": null,
"font_size": 100,
"font": "fonts/LuckiestGuy-Regular.ttf",
"color": "white",
"stroke_width": 2,
"stroke_color": "black",
"method": "caption",
"size":[900, 450]
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
},
{
"type": "screen_position",
"param": {
"pos": "center"
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/make_caption_arabic_landscape.json
================================================
{
"caption": {
"type": "text",
"z": 4,
"inputs":{
"parameters": ["text"],
"actions": ["set_time_start", "set_time_end"]
},
"parameters": {
"text": null,
"font_size": 100,
"font": "fonts/LuckiestGuy-Regular.ttf",
"color": "white",
"stroke_width": 2,
"stroke_color": "black"
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
},
{
"type": "screen_position",
"param": {
"pos": ["center", 800]
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/make_caption_landscape.json
================================================
{
"caption": {
"type": "text",
"z": 4,
"inputs":{
"parameters": ["text"],
"actions": ["set_time_start", "set_time_end"]
},
"parameters": {
"text": null,
"font_size": 100,
"font": "fonts/LuckiestGuy-Regular.ttf",
"color": "white",
"stroke_width": 3,
"stroke_color": "black",
"method": "label"
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
},
{
"type": "screen_position",
"param": {
"pos": ["center", 820]
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/show_reddit_image.json
================================================
{
"reddit_image": {
"type": "image",
"inputs":{
"parameters": ["url"]
},
"z": 5,
"parameters": {
"url": null
},
"actions": [
{
"type": "set_time_start",
"param": 0
},
{
"type": "set_time_end",
"param": 3.5
},
{
"type": "screen_position",
"param": {
"pos": [
"center","center"
]
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/show_top_image.json
================================================
{
"top_image_1": {
"type": "image",
"inputs":{
"parameters": ["url"],
"actions": ["set_time_start", "set_time_end"]
},
"z": 5,
"parameters": {
"url": null
},
"actions": [
{
"type": "set_time_start",
"param": null
},
{
"type": "set_time_end",
"param": null
},
{
"type": "auto_resize_image",
"param":{
"maxWidth": 690,
"maxHeight": 690
}
},
{
"type": "normalize_image",
"param":{
"maxWidth": 690,
"maxHeight": 690
}
},
{
"type": "screen_position",
"param": {
"pos": [
"center",
50
]
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/show_watermark.json
================================================
{
"short_watermark": {
"inputs":{
"parameters": ["text"]
},
"type": "text",
"z": 3,
"parameters": {
"text": null,
"font_size": 100,
"font": "fonts/LuckiestGuy-Regular.ttf",
"color": "white",
"stroke_width": 1,
"stroke_color": "black",
"method": "caption",
"size":[900, 450]
},
"actions": [
{
"type": "screen_position",
"param": {
"pos": [
"center",
0.7
],
"relative": true
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/editing_steps/subscribe_animation.json
================================================
{
"subscribe_animation": {
"type": "video",
"z": 6,
"inputs":{
"parameters": ["url"]
},
"parameters": {
"url": null,
"audio": false
},
"actions": [
{
"type": "set_time_start",
"param": 3.5
},
{
"type": "resize",
"param": {
"new_size": 0.4
}
},
{
"type": "green_screen",
"param": {
"color": [
52,
255,
20
],
"threshold": 100,
"stiffness": 5
}
},
{
"type": "screen_position",
"param": {
"pos": ["center",
1160]
}
}
]
}
}
================================================
FILE: shortGPT/editing_framework/flows/__init__.py
================================================
================================================
FILE: shortGPT/editing_framework/flows/build_reddit_image.json
================================================
{
"inputs":{
"username_text": "visual_assets/username_txt/parameters/text",
"ncomments_text": "visual_assets/ncomments_txt/parameters/text",
"nupvote_text": "visual_assets/nupvote_txt/parameters/text",
"question_text": "visual_assets/question_txt/parameters/text"
},
"visual_assets":{
"white_reddit_template_image": {
"type": "image",
"z": 0,
"parameters": {
"url": "public/white_reddit_template.png"
},
"actions": [
]
},
"username_txt": {
"type": "text",
"z": 1,
"parameters": {
"text": null,
"font_size": 32,
"font" : "fonts/Roboto-Bold.ttf",
"color": "rgb(129, 131, 132)"
},
"actions": [
{
"type": "screen_position",
"param": {
"pos":[350, 43],
"relative": false
}
}
]
},
"ncomments_txt":{
"type": "text",
"z": 1,
"parameters": {
"text": null,
"font_size": 34,
"font" : "fonts/Roboto-Bold.ttf",
"color": "rgb(129, 131, 132)"
},
"actions": [
{
"type": "screen_position",
"param": {
"pos":[222, 301],
"relative": false
}
}
]
},
"nupvote_txt":{
"type": "text",
"z": 1,
"parameters": {
"text": null,
"font_size": 36,
"font" : "fonts/Roboto-Bold.ttf",
"color": "rgb(26, 26 , 27)"
},
"actions": [
{
"type": "screen_position",
"param": {
"pos":[28, 115],
"relative": false
}
}
]
},
"question_txt": {
"type": "text",
"z": 1,
"parameters": {
"text": null,
"font_size": 40,
"font" : "fonts/Roboto-Bold.ttf",
"color": "rgb(26, 26, 27)",
"method": "label",
"text_align": "left"
},
"actions": [
{
"type": "screen_position",
"param": {
"pos":[150, 110],
"relative": false
}
}
]
}
}
}
================================================
FILE: shortGPT/editing_framework/rendering_logger.py
================================================
from proglog import ProgressBarLogger
import time
class MoviepyProgressLogger(ProgressBarLogger):
def __init__(self, callBackFunction = None):
super().__init__()
self.callBackFunction = callBackFunction
self.start_time = time.time()
def bars_callback(self, bar, attr, value, old_value=None):
# Every time the logger progress is updated, this function is called
percentage = (value / self.bars[bar]['total']) * 100
elapsed_time = time.time() - self.start_time
estimated_time = (elapsed_time / percentage) * (100 - percentage) if percentage != 0 else 0
progress_string = f'Rendering progress : {value}/{self.bars[bar]["total"]} | Time spent: {self.format_time(elapsed_time)} | Time left: {self.format_time(estimated_time)}'
if (self.callBackFunction):
self.callBackFunction(progress_string)
else:
print(progress_string)
def format_time(self, seconds):
minutes, seconds = divmod(seconds, 60)
return f'{int(minutes)}m {int(seconds)}s'
================================================
FILE: shortGPT/editing_utils/README.md
================================================
# Module: editing_utils
The `editing_utils` module provides utility functions for editing videos and images. It consists of three files: `editing_images.py`, `captions.py`, and `handle_videos.py`.
## File: editing_images.py
This file contains functions related to editing images.
### Function: getImageUrlsTimed(imageTextPairs)
This function takes a list of image-text pairs and returns a list of tuples containing the image URL and the corresponding text. It uses the `searchImageUrlsFromQuery` function to search for image URLs based on the provided text.
### Function: searchImageUrlsFromQuery(query, top=3, expected_dim=[720,720], retries=5)
This function searches for image URLs based on a given query. It uses the `getBingImages` function from the `shortGPT.api_utils.image_api` module to fetch the images. The `top` parameter specifies the number of images to fetch (default is 3), and the `expected_dim` parameter specifies the expected dimensions of the images (default is [720,720]). If no images are found, the function returns None. Otherwise, it selects the images with the closest dimensions to the expected dimensions and returns the URL of the first image.
## File: captions.py
This file contains functions related to handling captions.
### Function: interpolateTimeFromDict(word_position, d)
This function interpolates the time based on the word position in a dictionary. The dictionary contains word positions as keys and corresponding timestamps as values. Given a word position, the function returns the interpolated timestamp.
### Function: cleanWord(word)
This function cleans a word by removing any non-alphanumeric characters.
### Function: getTimestampMapping(whisper_analysis)
This function extracts the mapping of word positions to timestamps from a Whisper analysis. The `whisper_analysis` parameter is a dictionary containing the analysis results. The function returns a dictionary with word positions as keys and corresponding timestamps as values.
### Function: splitWordsBySize(words, maxCaptionSize)
This function splits a list of words into captions based on a maximum caption size. The `maxCaptionSize` parameter specifies the maximum number of characters allowed in a caption (default is 15). The function returns a list of captions.
### Function: getCaptionsWithTime(whisper_analysis, maxCaptionSize=15)
This function generates captions with their corresponding timestamps from a Whisper analysis. The `whisper_analysis` parameter is a dictionary containing the analysis results. The `maxCaptionSize` parameter specifies the maximum number of characters allowed in a caption (default is 15). The function uses the `getTimestampMapping` function to get the word position to timestamp mapping and the `splitWordsBySize` function to split the words into captions. It returns a list of caption-time pairs.
## File: handle_videos.py
This file contains functions related to handling videos.
### Function: getYoutubeAudio(url)
This function retrieves the audio URL and duration from a YouTube video. The `url` parameter specifies the URL of the YouTube video. The function uses the `yt_dlp` library to extract the audio information. It returns the audio URL and duration as a tuple. If the retrieval fails, it returns None.
### Function: getYoutubeVideoLink(url)
This function retrieves the video URL and duration from a YouTube video. The `url` parameter specifies the URL of the YouTube video. The function uses the `yt_dlp` library to extract the video information. It returns the video URL and duration as a tuple. If the retrieval fails, it returns None.
### Function: extract_random_clip_from_video(video_url, video_duration, clip_duration, output_file)
This function extracts a random clip from a video and saves it to an output file. The `video_url` parameter specifies the URL of the video, the `video_duration` parameter specifies the duration of the video, the `clip_duration` parameter specifies the duration of the desired clip, and the `output_file` parameter specifies the file path for the extracted clip. The function uses the `ffmpeg` library to perform the extraction. It randomly selects a start time within 15% to 85% of the video duration and extracts a clip of the specified duration starting from the selected start time. If the extraction fails or the output file is not created, an exception is raised.
================================================
FILE: shortGPT/editing_utils/__init__.py
================================================
from . import editing_images
from . import captions
================================================
FILE: shortGPT/editing_utils/captions.py
================================================
import re
def getSpeechBlocks(whispered, silence_time=0.8):
text_blocks, (st, et, txt) = [], (0,0,"")
for i, seg in enumerate(whispered['segments']):
if seg['start'] - et > silence_time:
if txt: text_blocks.append([[st, et], txt])
(st, et, txt) = (seg['start'], seg['end'], seg['text'])
else:
et, txt = seg['end'], txt + seg['text']
if txt: text_blocks.append([[st, et], txt]) # For last text block
return text_blocks
def cleanWord(word):
return re.sub(r'[^\w\s\-_"\'\']', '', word)
def interpolateTimeFromDict(word_position, d):
for key, value in d.items():
if key[0] <= word_position <= key[1]:
return value
return None
def getTimestampMapping(whisper_analysis):
index = 0
locationToTimestamp = {}
for segment in whisper_analysis['segments']:
for word in segment['words']:
newIndex = index + len(word['text'])+1
locationToTimestamp[(index, newIndex)] = word['end']
index = newIndex
return locationToTimestamp
def splitWordsBySize(words, maxCaptionSize):
halfCaptionSize = maxCaptionSize / 2
captions = []
while words:
caption = words[0]
words = words[1:]
while words and len(caption + ' ' + words[0]) <= maxCaptionSize:
caption += ' ' + words[0]
words = words[1:]
if len(caption) >= halfCaptionSize and words:
break
captions.append(caption)
return captions
def getCaptionsWithTime(transcriptions, maxCaptionSize=15, considerPunctuation=True):
time_splits = []
current_caption = []
current_length = 0
# Ensure we only work with transcriptions that have word-level timing
segments = [seg for seg in transcriptions['segments'] if 'words' in seg]
# Flatten all words from all segments
all_words = []
for segment in segments:
all_words.extend(segment['words'])
for i, word in enumerate(all_words):
word_text = word['text']
# Check if this word would exceed maxCaptionSize
new_length = current_length + len(word_text) + (1 if current_caption else 0)
# Determine if we should split here
should_split = (
new_length > maxCaptionSize or
(considerPunctuation and word_text.rstrip('.,!?') != word_text and current_caption) or
i == len(all_words) - 1 or
len(current_caption) >= 5
)
# Add word to current caption if we're not splitting yet
if not should_split:
current_caption.append(word_text)
current_length = new_length
continue
# Handle the split
if current_caption:
# Add current word if this is the last one
if i == len(all_words) - 1 and new_length <= maxCaptionSize:
current_caption.append(word_text)
caption_text = ' '.join(current_caption)
start_time = all_words[i - len(current_caption)]['start']
end_time = word['end'] if word_text in current_caption else all_words[i - 1]['end']
time_splits.append(((start_time, end_time), caption_text))
# Handle current word if it wasn't added to the previous caption
if word_text not in current_caption and i == len(all_words) - 1:
time_splits.append(((word['start'], word['end']), word_text))
# Reset for next caption
current_caption = []
current_length = 0
# Start new caption with current word if it wasn't the last one
if i < len(all_words) - 1:
current_caption.append(word_text)
current_length = len(word_text)
return time_splits
================================================
FILE: shortGPT/editing_utils/editing_images.py
================================================
from shortGPT.api_utils.image_api import getBingImages
from tqdm import tqdm
import random
import math
def getImageUrlsTimed(imageTextPairs):
return [(pair[0], searchImageUrlsFromQuery(pair[1])) for pair in tqdm(imageTextPairs, desc='Search engine queries for images...')]
def searchImageUrlsFromQuery(query, top=3, expected_dim=[720,720], retries=5):
images = getBingImages(query, retries=retries)
if(images):
distances = list(map(lambda x: math.dist([x['width'], x['height']], expected_dim), images[0:top]))
shortest_ones = sorted(distances)
random.shuffle(shortest_ones)
for distance in shortest_ones:
image_url = images[distances.index(distance)]['url']
return image_url
return None
================================================
FILE: shortGPT/editing_utils/handle_videos.py
================================================
import os
import random
import yt_dlp
import subprocess
import json
def getYoutubeVideoLink(url):
format_filter = "[height<=1920]" if 'shorts' in url else "[height<=1080]"
ydl_opts = {
"quiet": True,
"no_warnings": True,
"no_color": True,
"no_call_home": True,
"no_check_certificate": True,
# Look for m3u8 formats first, then fall back to regular formats
"format": f"bestvideo[ext=m3u8]{format_filter}/bestvideo{format_filter}"
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
dictMeta = ydl.extract_info(
url,
download=False)
return dictMeta['url'], dictMeta['duration']
except Exception as e:
raise Exception(f"Failed getting video link from the following video/url {url} {e.args[0]}")
def extract_random_clip_from_video(video_url, video_duration, clip_duration, output_file):
"""Extracts a clip from a video using a signed URL.
Args:
video_url (str): The signed URL of the video.
video_url (int): Duration of the video.
start_time (int): The start time of the clip in seconds.
clip_duration (int): The duration of the clip in seconds.
output_file (str): The output file path for the extracted clip.
"""
if not video_duration:
raise Exception("Could not get video duration")
if not video_duration*0.7 > 120:
raise Exception("Video too short")
start_time = video_duration*0.15 + random.random()* (0.7*video_duration-clip_duration)
command = [
'ffmpeg',
'-loglevel', 'error',
'-ss', str(start_time),
'-t', str(clip_duration),
'-i', video_url,
'-c:v', 'libx264',
'-preset', 'ultrafast',
output_file
]
subprocess.run(command, check=True)
if not os.path.exists(output_file):
raise Exception("Random clip failed to be written")
return output_file
def get_aspect_ratio(video_file):
cmd = 'ffprobe -i "{}" -v quiet -print_format json -show_format -show_streams'.format(video_file)
# jsonstr = subprocess.getoutput(cmd)
jsonstr = subprocess.check_output(cmd, shell=True, encoding='utf-8')
r = json.loads(jsonstr)
# look for "codec_type": "video". take the 1st one if there are mulitple
video_stream_info = [x for x in r['streams'] if x['codec_type']=='video'][0]
if 'display_aspect_ratio' in video_stream_info and video_stream_info['display_aspect_ratio']!="0:1":
a,b = video_stream_info['display_aspect_ratio'].split(':')
dar = int(a)/int(b)
else:
# some video do not have the info of 'display_aspect_ratio'
w,h = video_stream_info['width'], video_stream_info['height']
dar = int(w)/int(h)
## not sure if we should use this
#cw,ch = video_stream_info['coded_width'], video_stream_info['coded_height']
#sar = int(cw)/int(ch)
if 'sample_aspect_ratio' in video_stream_info and video_stream_info['sample_aspect_ratio']!="0:1":
# some video do not have the info of 'sample_aspect_ratio'
a,b = video_stream_info['sample_aspect_ratio'].split(':')
sar = int(a)/int(b)
else:
sar = dar
par = dar/sar
return dar
================================================
FILE: shortGPT/engine/README.md
================================================
# **Module: engine**
This module contains the main engine classes for generating different types of short videos. There are four main engine classes in this module:
- `AbstractContentEngine`: This is an abstract base class that provides the basic functionalities and attributes required by all content engines. It implements common methods for initializing the content engine, preparing editing paths, verifying parameters, and rendering the short video.
- `ContentShortEngine`: This class extends `AbstractContentEngine` and is used for generating general content short videos. It implements specific methods for generating a script, generating temporary audio, speeding up the audio, timing captions, generating image search terms, generating image URLs, choosing background music and video, and preparing background and custom assets. It also overrides the `__generateScript` method to generate the script for the content short video.
- `ContentVideoEngine`: This class extends `AbstractContentEngine` and is used for generating general content videos. It implements specific methods for generating temporary audio, speeding up the audio, timing captions, generating video search terms, generating video URLs, choosing background music, and preparing background and custom assets.
- `FactsShortEngine`: This class extends `ContentShortEngine` and is used for generating facts short videos. It overrides the `_generateScript` method to generate the script for the facts short video.
- `RedditShortEngine`: This class extends `ContentShortEngine` and is used for generating reddit short videos. It overrides the `_generateScript` method to generate the script for the reddit short video and adds a custom step for preparing a reddit image.
---
## **File: abstract_content_engine.py**
This file contains the `AbstractContentEngine` class, which is an abstract base class for all content engines. It provides the basic functionalities and attributes required by all content engines.
### **Class: AbstractContentEngine**
#### **Attributes:**
- `CONTENT_DB`: An instance of the `ContentDatabase` class, which is used to store and retrieve content data.
#### **Methods:**
- `__init__(self, short_id: str, content_type:str, language: Language, voiceName: str)`: Initializes an instance of the `AbstractContentEngine` class with the given parameters. It sets the `dataManager`, `id`, `_db_language`, `voiceModule`, `assetStore`, `stepDict`, and `logger` attributes.
- `__getattr__(self, name)`: Overrides the `__getattr__` method to retrieve attributes that start with '_db_' from the `dataManager`.
- `__setattr__(self, name, value)`: Overrides the `__setattr__` method to save attributes that start with '_db_' to the `dataManager`.
- `prepareEditingPaths(self)`: Creates the directory for storing dynamic assets if it doesn't already exist.
- `verifyParameters(*args, **kwargs)`: Verifies that all the required parameters are not null. If any parameter is null, it raises an exception.
- `isShortDone(self)`: Checks if the short video is done rendering by checking the value of the '_db_ready_to_upload' attribute.
- `makeContent(self)`: Generates the short video by executing the steps defined in the `stepDict`. It yields the current step number and a message indicating the progress.
- `get_video_output_path(self)`: Returns the path of the rendered video.
- `get_total_steps(self)`: Returns the total number of steps in the `stepDict`.
- `set_logger(self, logger)`: Sets the logger function for logging the progress of the short video rendering.
- `initializeFFMPEG(self)`: Initializes the paths for FFmpeg, FFProbe. If any of these programs are not found, it raises an exception.
---
## **File: content_short_engine.py**
This file contains the `ContentShortEngine` class, which is used for generating general content short videos. It extends the `AbstractContentEngine` class and adds specific methods for generating a script, generating temporary audio, speeding up the audio, timing captions, generating image search terms, generating image URLs, choosing background music and video, and preparing background and custom assets.
### **Class: ContentShortEngine**
#### **Attributes:**
- `stepDict`: A dictionary that maps step numbers to their corresponding methods for generating the short video.
#### **Methods:**
- `__init__(self, short_type: str, background_video_name: str, background_music_name: str, short_id="", num_images=None, watermark=None, language: Language = Language.ENGLISH, voiceName="")`: Initializes an instance of the `ContentShortEngine` class with the given parameters. It sets the `stepDict` attribute with the specific methods for generating the short video.
- `__generateScript(self)`: Abstract method that generates the script for the content short video. This method needs to be implemented by the child classes.
- `__prepareCustomAssets(self)`: Abstract method that prepares the custom assets for the content short video. This method needs to be implemented by the child classes.
- `__editAndRenderShort(self)`: Abstract method that performs the editing and rendering of the content short video. This method needs to be implemented by the child classes.
---
## **File: content_video_engine.py**
This file contains the `ContentVideoEngine` class, which is used for generating general content videos. It extends the `AbstractContentEngine` class and adds specific methods for generating temporary audio, speeding up the audio, timing captions, generating video search terms, generating video URLs, choosing background music, and preparing background and custom assets.
### **Class: ContentVideoEngine**
#### **Methods:**
- `__generateTempAudio(self)`: Generates the temporary audio for the content video by using the `voiceModule` to generate a voice from the script.
- `__speedUpAudio(self)`: Speeds up the temporary audio to match the duration of the background video.
- `__timeCaptions(self)`: Converts the audio to text and then generates captions with time based on the text.
- `__generateVideoSearchTerms(self)`: Generates the video search terms by using the timed captions.
- `__generateVideoUrls(self)`: Generates the video URLs by using the video search terms and the `getBestVideo` function from the `pexels_api`.
- `__chooseBackgroundMusic(self)`: Retrieves the background music URL from the `assetStore` based on the background music name.
- `__prepareBackgroundAssets(self)`: Prepares the background assets for the content video by retrieving the voiceover audio duration, trimming the background video, and extracting a random clip from the background video.
- `__prepareCustomAssets(self)`: Abstract method that prepares the custom assets for the content video. This method needs to be implemented by the child classes.
- `__editAndRenderShort(self)`: Performs the editing and rendering of the content video by using the `videoEditor` and the editing steps defined in the `stepDict`.
---
## **File: facts_short_engine.py**
This file contains the `FactsShortEngine` class, which is used for generating facts short videos. It extends the `ContentShortEngine` class and overrides the `_generateScript` method to generate the script for the facts short video.
### **Class: FactsShortEngine**
#### **Methods:**
- `_generateScript(self)`: Generates the script for the facts short video by using the `generateFacts` function from the `facts_gpt` module.
---
## **File: reddit_short_engine.py**
This file contains the `RedditShortEngine` class, which is used for generating reddit short videos. It extends the `ContentShortEngine` class and overrides the `_generateScript` method to generate the script for the reddit short video. It also adds a custom step for preparing a reddit image.
### **Class: RedditShortEngine**
#### **Methods:**
- `_generateScript(self)`: Generates the script for the reddit short video by using the `getInterestingRedditQuestion` function from the `reddit_gpt` module.
- `_prepareCustomAssets(self)`: Prepares the custom assets for the reddit short video by using the `ingestFlow` method from the `imageEditingEngine` to create a reddit image.
- `_editAndRenderShort(self)`: Performs the editing and rendering of the reddit short video by using the `videoEditor` and the editing steps defined in the `stepDict`.
================================================
FILE: shortGPT/engine/__init__.py
================================================
from . import abstract_content_engine
from . import reddit_short_engine
================================================
FILE: shortGPT/engine/abstract_content_engine.py
================================================
import os
from abc import ABC
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.languages import Language
from shortGPT.config.path_utils import get_program_path
from shortGPT.database.content_database import ContentDatabase
CONTENT_DB = ContentDatabase()
class AbstractContentEngine(ABC):
def __init__(self, short_id: str, content_type: str, language: Language, voiceModule: VoiceModule):
if short_id:
self.dataManager = CONTENT_DB.getContentDataManager(
short_id, content_type
)
else:
self.dataManager = CONTENT_DB.createContentDataManager(content_type)
self.id = str(self.dataManager._getId())
self.initializeFFMPEG()
self.prepareEditingPaths()
self._db_language = language.value
self.voiceModule = voiceModule
self.stepDict = {}
self.default_logger = lambda _: None
self.logger = self.default_logger
def __getattr__(self, name):
if name.startswith('_db_'):
db_path = name[4:] # remove '_db_' prefix
cache_attr = '_' + name
if not hasattr(self, cache_attr):
setattr(self, cache_attr, self.dataManager.get(db_path))
return getattr(self, cache_attr)
else:
return super().__getattr__(name)
def __setattr__(self, name, value):
if name.startswith('_db_'):
db_path = name[4:] # remove '_db_' prefix
cache_attr = '_' + name
setattr(self, cache_attr, value)
self.dataManager.save(db_path, value)
else:
super().__setattr__(name, value)
def prepareEditingPaths(self):
self.dynamicAssetDir = f".editing_assets/{self.dataManager.contentType}_assets/{self.id}/"
if not os.path.exists(self.dynamicAssetDir):
os.makedirs(self.dynamicAssetDir)
def verifyParameters(*args, **kargs):
keys = list(kargs.keys())
for key in keys:
if not kargs[key]:
print(kargs)
raise Exception(f"Parameter :{key} is null")
def isShortDone(self):
return self._db_ready_to_upload
def makeContent(self):
while (not self.isShortDone()):
currentStep = self._db_last_completed_step + 1
if currentStep not in self.stepDict:
raise Exception(f'Incorrect step {currentStep}')
if self.stepDict[currentStep].__name__ == "_editAndRenderShort":
yield currentStep, f'Current step ({currentStep} / {self.get_total_steps()}) : ' + "Preparing rendering assets..."
else:
yield currentStep, f'Current step ({currentStep} / {self.get_total_steps()}) : ' + self.stepDict[currentStep].__name__
if self.logger is not self.default_logger:
print(f'Step {currentStep} {self.stepDict[currentStep].__name__}')
self.stepDict[currentStep]()
self._db_last_completed_step = currentStep
def get_video_output_path(self):
return self._db_video_path
def get_total_steps(self):
return len(self.stepDict)
def set_logger(self, logger):
self.logger = logger
def initializeFFMPEG(self):
ffmpeg_path = get_program_path("ffmpeg")
if not ffmpeg_path:
raise Exception("FFmpeg, a program used for automated editing within ShortGPT was not found on your computer. Please go back to the README and follow the instructions to install FFMPEG")
ffprobe_path = get_program_path("ffprobe")
if not ffprobe_path:
raise Exception("FFProbe, a dependecy of FFmpeg was not found. Please go back to the README and follow the instructions to install FFMPEG")
================================================
FILE: shortGPT/engine/content_short_engine.py
================================================
import datetime
import os
import re
import shutil
from abc import abstractmethod
from shortGPT.audio import audio_utils
from shortGPT.audio.audio_duration import get_asset_duration
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.asset_db import AssetDatabase
from shortGPT.config.languages import Language
from shortGPT.editing_framework.editing_engine import (EditingEngine,
EditingStep)
from shortGPT.editing_utils import captions, editing_images
from shortGPT.editing_utils.handle_videos import extract_random_clip_from_video
from shortGPT.engine.abstract_content_engine import AbstractContentEngine
from shortGPT.gpt import gpt_editing, gpt_translate, gpt_yt
class ContentShortEngine(AbstractContentEngine):
def __init__(self, short_type: str, background_video_name: str, background_music_name: str, voiceModule: VoiceModule, short_id="",
num_images=None, watermark=None, language: Language = Language.ENGLISH,):
super().__init__(short_id, short_type, language, voiceModule)
if not short_id:
if (num_images):
self._db_num_images = num_images
if (watermark):
self._db_watermark = watermark
self._db_background_video_name = background_video_name
self._db_background_music_name = background_music_name
self.stepDict = {
1: self._generateScript,
2: self._generateTempAudio,
3: self._speedUpAudio,
4: self._timeCaptions,
5: self._generateImageSearchTerms,
6: self._generateImageUrls,
7: self._chooseBackgroundMusic,
8: self._chooseBackgroundVideo,
9: self._prepareBackgroundAssets,
10: self._prepareCustomAssets,
11: self._editAndRenderShort,
12: self._addYoutubeMetadata
}
@abstractmethod
def _generateScript(self):
self._db_script = ""
def _generateTempAudio(self):
if not self._db_script:
raise NotImplementedError("generateScript method must set self._db_script.")
if (self._db_temp_audio_path):
return
self.verifyParameters(text=self._db_script)
script = self._db_script
if (self._db_language != Language.ENGLISH.value):
self._db_translated_script = gpt_translate.translateContent(script, self._db_language)
script = self._db_translated_script
self._db_temp_audio_path = self.voiceModule.generate_voice(
script, self.dynamicAssetDir + "temp_audio_path.wav")
def _speedUpAudio(self):
if (self._db_audio_path):
return
self.verifyParameters(tempAudioPath=self._db_temp_audio_path)
self._db_audio_path = audio_utils.speedUpAudio(
self._db_temp_audio_path, self.dynamicAssetDir+"audio_voice.wav")
def _timeCaptions(self):
self.verifyParameters(audioPath=self._db_audio_path)
whisper_analysis = audio_utils.audioToText(self._db_audio_path)
self._db_timed_captions = captions.getCaptionsWithTime(
whisper_analysis)
def _generateImageSearchTerms(self):
self.verifyParameters(captionsTimed=self._db_timed_captions)
if self._db_num_images:
self._db_timed_image_searches = gpt_editing.getImageQueryPairs(
self._db_timed_captions, n=self._db_num_images)
def _generateImageUrls(self):
if self._db_timed_image_searches:
self._db_timed_image_urls = editing_images.getImageUrlsTimed(
self._db_timed_image_searches)
def _chooseBackgroundMusic(self):
self._db_background_music_url = AssetDatabase.get_asset_link(self._db_background_music_name)
def _chooseBackgroundVideo(self):
self._db_background_video_url = AssetDatabase.get_asset_link(
self._db_background_video_name)
self._db_background_video_duration = AssetDatabase.get_asset_duration(
self._db_background_video_name)
def _prepareBackgroundAssets(self):
self.verifyParameters(
voiceover_audio_url=self._db_audio_path,
video_duration=self._db_background_video_duration,
background_video_url=self._db_background_video_url, music_url=self._db_background_music_url)
if not self._db_voiceover_duration:
self.logger("Rendering short: (1/4) preparing voice asset...")
self._db_audio_path, self._db_voiceover_duration = get_asset_duration(
self._db_audio_path, isVideo=False)
if not self._db_background_trimmed:
self.logger("Rendering short: (2/4) preparing background video asset...")
self._db_background_trimmed = extract_random_clip_from_video(
self._db_background_video_url, self._db_background_video_duration, self._db_voiceover_duration, self.dynamicAssetDir + "clipped_background.mp4")
def _prepareCustomAssets(self):
self.logger("Rendering short: (3/4) preparing custom assets...")
pass
def _editAndRenderShort(self):
self.verifyParameters(
voiceover_audio_url=self._db_audio_path,
video_duration=self._db_background_video_duration,
music_url=self._db_background_music_url)
outputPath = self.dynamicAssetDir+"rendered_video.mp4"
if not (os.path.exists(outputPath)):
self.logger("Rendering short: Starting automated editing...")
videoEditor = EditingEngine()
videoEditor.addEditingStep(EditingStep.ADD_VOICEOVER_AUDIO, {
'url': self._db_audio_path})
videoEditor.addEditingStep(EditingStep.ADD_BACKGROUND_MUSIC, {'url': self._db_background_music_url,
'loop_background_music': self._db_voiceover_duration,
"volume_percentage": 0.11})
videoEditor.addEditingStep(EditingStep.CROP_1920x1080, {
'url': self._db_background_trimmed})
videoEditor.addEditingStep(EditingStep.ADD_SUBSCRIBE_ANIMATION, {'url': AssetDatabase.get_asset_link('subscribe animation')})
if self._db_watermark:
videoEditor.addEditingStep(EditingStep.ADD_WATERMARK, {
'text': self._db_watermark})
caption_type = EditingStep.ADD_CAPTION_SHORT_ARABIC if self._db_language == Language.ARABIC.value else EditingStep.ADD_CAPTION_SHORT
for timing, text in self._db_timed_captions:
videoEditor.addEditingStep(caption_type, {'text': text.upper(),
'set_time_start': timing[0],
'set_time_end': timing[1]})
if self._db_num_images:
for timing, image_url in self._db_timed_image_urls:
videoEditor.addEditingStep(EditingStep.SHOW_IMAGE, {'url': image_url,
'set_time_start': timing[0],
'set_time_end': timing[1]})
print("***** SCHEMA FOR RENDERING ****")
print(videoEditor.dumpEditingSchema())
print("***** SCHEMA FOR RENDERING ****")
videoEditor.renderVideo(outputPath, logger= self.logger if self.logger is not self.default_logger else None)
self._db_video_path = outputPath
def _addYoutubeMetadata(self):
if not os.path.exists('videos/'):
os.makedirs('videos')
self._db_yt_title, self._db_yt_description = gpt_yt.generate_title_description_dict(self._db_script)
now = datetime.datetime.now()
date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
newFileName = f"videos/{date_str} - " + \
re.sub(r"[^a-zA-Z0-9 '\n\.]", '', self._db_yt_title)
shutil.move(self._db_video_path, newFileName+".mp4")
with open(newFileName+".txt", "w", encoding="utf-8") as f:
f.write(
f"---Youtube title---\n{self._db_yt_title}\n---Youtube description---\n{self._db_yt_description}")
self._db_video_path = newFileName+".mp4"
self._db_ready_to_upload = True
================================================
FILE: shortGPT/engine/content_translation_engine.py
================================================
import datetime
import os
import re
import shutil
from tqdm import tqdm
from shortGPT.audio.audio_duration import get_asset_duration
from shortGPT.audio.audio_utils import (audioToText, get_asset_duration,
run_background_audio_split,
speedUpAudio)
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.languages import ACRONYM_LANGUAGE_MAPPING, Language
from shortGPT.editing_framework.editing_engine import (EditingEngine,
EditingStep)
from shortGPT.editing_utils.captions import (getCaptionsWithTime,
getSpeechBlocks)
from shortGPT.editing_utils.handle_videos import get_aspect_ratio
from shortGPT.engine.abstract_content_engine import AbstractContentEngine
from shortGPT.gpt.gpt_translate import translateContent
class ContentTranslationEngine(AbstractContentEngine):
def __init__(self, voiceModule: VoiceModule, src_url: str = "", target_language: Language = Language.ENGLISH, use_captions=False, id=""):
super().__init__(id, "content_translation", target_language, voiceModule)
if not id:
self._db_should_translate = True
if src_url:
self._db_src_url = src_url
self._db_use_captions = use_captions
self._db_target_language = target_language.value
self.stepDict = {
1: self._transcribe_audio,
2: self._translate_content,
3: self._generate_translated_audio,
4: self._edit_and_render_video,
5: self._add_metadata
}
def _transcribe_audio(self):
video_audio, _ = get_asset_duration(self._db_src_url, isVideo=False)
self.verifyParameters(content_path=video_audio)
self.logger(f"1/5 - Transcribing original audio to text...")
whispered = audioToText(video_audio, model_size='base')
self._db_speech_blocks = getSpeechBlocks(whispered, silence_time=0.8)
if (ACRONYM_LANGUAGE_MAPPING.get(whispered['language']) == Language(self._db_target_language)):
self._db_translated_timed_sentences = self._db_speech_blocks
self._db_should_translate = False
expected_chars = len("".join([text for _, text in self._db_speech_blocks]))
chars_remaining = self.voiceModule.get_remaining_characters()
if chars_remaining < expected_chars:
raise Exception(
f"Your VoiceModule's key doesn't have enough characters to totally translate this video | Remaining: {chars_remaining} | Number of characters to translate: {expected_chars}")
def _translate_content(self):
if (self._db_should_translate):
self.verifyParameters(_db_speech_blocks=self._db_speech_blocks)
translated_timed_sentences = []
for i, ((t1, t2), text) in tqdm(enumerate(self._db_speech_blocks), desc="Translating content"):
self.logger(f"2/5 - Translating text content - {i+1} / {len(self._db_speech_blocks)}")
translated_text = translateContent(text, self._db_target_language)
translated_timed_sentences.append([[t1, t2], translated_text])
self._db_translated_timed_sentences = translated_timed_sentences
def _generate_translated_audio(self):
self.verifyParameters(translated_timed_sentences=self._db_translated_timed_sentences)
translated_audio_blocks = []
for i, ((t1, t2), translated_text) in tqdm(enumerate(self._db_translated_timed_sentences), desc="Generating translated audio"):
self.logger(f"3/5 - Generating translated audio - {i+1} / {len(self._db_translated_timed_sentences)}")
translated_voice = self.voiceModule.generate_voice(translated_text, self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}.wav")
if not translated_voice:
raise Exception('An error happending during audio voice creation')
final_audio_path = speedUpAudio(translated_voice, self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}_spedup.wav", expected_duration=t2-t1 - 0.05)
_, translated_duration = get_asset_duration(final_audio_path, isVideo=False)
translated_audio_blocks.append([[t1, t1+translated_duration], final_audio_path])
self._db_audio_bits = translated_audio_blocks
def _edit_and_render_video(self):
self.verifyParameters(_db_audio_bits=self._db_audio_bits)
self.logger(f"4.1 / 5 - Preparing automated editing")
target_language = Language(self._db_target_language)
input_video, video_length = get_asset_duration(self._db_src_url)
video_audio, _ = get_asset_duration(self._db_src_url, isVideo=False)
editing_engine = EditingEngine()
editing_engine.addEditingStep(EditingStep.ADD_BACKGROUND_VIDEO, {'url': input_video, "set_time_start": 0, "set_time_end": video_length})
last_t2 = 0
for (t1, t2), audio_path in self._db_audio_bits:
t2+=-0.05
editing_engine.addEditingStep(EditingStep.INSERT_AUDIO, {'url': audio_path, 'set_time_start': t1, 'set_time_end': t2})
if t1-last_t2 >4:
editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"start_time": last_t2, "end_time": t1}, "set_time_start": last_t2, "set_time_end": t1})
last_t2 = t2
if video_length - last_t2 >4:
editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"start_time": last_t2, "end_time": video_length}, "set_time_start": last_t2, "set_time_end": video_length})
if self._db_use_captions:
is_landscape = get_aspect_ratio(input_video) > 1
if not self._db_timed_translated_captions:
if not self._db_translated_voiceover_path:
self.logger(f"4.5 / 5 - Generating captions in {target_language.value}")
editing_engine.generateAudio(self.dynamicAssetDir+"translated_voiceover.wav")
self._db_translated_voiceover_path = self.dynamicAssetDir+"translated_voiceover.wav"
whispered_translated = audioToText(self._db_translated_voiceover_path, model_size='base')
timed_translated_captions = getCaptionsWithTime(whispered_translated, maxCaptionSize=50 if is_landscape else 15, considerPunctuation=True)
self._db_timed_translated_captions = [[[t1,t2], text] for (t1, t2), text in timed_translated_captions if t2 - t1 <= 4]
for (t1, t2), text in self._db_timed_translated_captions:
caption_key = "LANDSCAPE" if is_landscape else "SHORT"
caption_key += "_ARABIC" if target_language == Language.ARABIC else ""
caption_type = getattr(EditingStep, f"ADD_CAPTION_{caption_key}")
editing_engine.addEditingStep(caption_type, {'text': text, "set_time_start": t1, "set_time_end": t2})
self._db_video_path = self.dynamicAssetDir+"translated_content.mp4"
editing_engine.renderVideo(self._db_video_path, logger= self.logger if self.logger is not self.default_logger else None)
def _add_metadata(self):
self.logger(f"5 / 5 - Saving translated video")
now = datetime.datetime.now()
date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
newFileName = f"videos/{date_str} - " + \
re.sub(r"[^a-zA-Z0-9 '\n\.]", '', f"translated_content_to_{self._db_target_language}")
shutil.move(self._db_video_path, newFileName+".mp4")
self._db_video_path = newFileName+".mp4"
self._db_ready_to_upload = True
================================================
FILE: shortGPT/engine/content_video_engine.py
================================================
import datetime
import os
import re
import shutil
from shortGPT.api_utils.pexels_api import getBestVideo
from shortGPT.audio import audio_utils
from shortGPT.audio.audio_duration import get_asset_duration
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.asset_db import AssetDatabase
from shortGPT.config.languages import Language
from shortGPT.editing_framework.editing_engine import (EditingEngine,
EditingStep)
from shortGPT.editing_utils import captions
from shortGPT.engine.abstract_content_engine import AbstractContentEngine
from shortGPT.gpt import gpt_editing, gpt_translate, gpt_yt
class ContentVideoEngine(AbstractContentEngine):
def __init__(self, voiceModule: VoiceModule, script: str, background_music_name="", id="",
watermark=None, isVerticalFormat=False, language: Language = Language.ENGLISH):
super().__init__(id, "general_video", language, voiceModule)
if not id:
if (watermark):
self._db_watermark = watermark
if background_music_name:
self._db_background_music_name = background_music_name
self._db_script = script
self._db_format_vertical = isVerticalFormat
self.stepDict = {
1: self._generateTempAudio,
2: self._speedUpAudio,
3: self._timeCaptions,
4: self._generateVideoSearchTerms,
5: self._generateVideoUrls,
6: self._chooseBackgroundMusic,
7: self._prepareBackgroundAssets,
8: self._prepareCustomAssets,
9: self._editAndRenderShort,
10: self._addMetadata
}
def _generateTempAudio(self):
if not self._db_script:
raise NotImplementedError("generateScript method must set self._db_script.")
if (self._db_temp_audio_path):
return
self.verifyParameters(text=self._db_script)
script = self._db_script
if (self._db_language != Language.ENGLISH.value):
self._db_translated_script = gpt_translate.translateContent(script, self._db_language)
script = self._db_translated_script
self._db_temp_audio_path = self.voiceModule.generate_voice(
script, self.dynamicAssetDir + "temp_audio_path.wav")
def _speedUpAudio(self):
if (self._db_audio_path):
return
self.verifyParameters(tempAudioPath=self._db_temp_audio_path)
# Since the video is not supposed to be a short( less than 60sec), there is no reason to speed it up
self._db_audio_path = self._db_temp_audio_path
return
self._db_audio_path = audio_utils.speedUpAudio(
self._db_temp_audio_path, self.dynamicAssetDir+"audio_voice.wav")
def _timeCaptions(self):
self.verifyParameters(audioPath=self._db_audio_path)
whisper_analysis = audio_utils.audioToText(self._db_audio_path)
max_len = 15
if not self._db_format_vertical:
max_len = 30
self._db_timed_captions = captions.getCaptionsWithTime(
whisper_analysis, maxCaptionSize=max_len)
def _generateVideoSearchTerms(self):
self.verifyParameters(captionsTimed=self._db_timed_captions)
# Returns a list of pairs of timing (t1,t2) + 3 search video queries, such as: [[t1,t2], [search_query_1, search_query_2, search_query_3]]
self._db_timed_video_searches = gpt_editing.getVideoSearchQueriesTimed(self._db_timed_captions)
def _generateVideoUrls(self):
timed_video_searches = self._db_timed_video_searches
self.verifyParameters(captionsTimed=timed_video_searches)
timed_video_urls = []
used_links = []
for (t1, t2), search_terms in timed_video_searches:
url = ""
for query in reversed(search_terms):
url = getBestVideo(query, orientation_landscape=not self._db_format_vertical, used_vids=used_links)
if url:
used_links.append(url.split('.hd')[0])
break
timed_video_urls.append([[t1, t2], url])
self._db_timed_video_urls = timed_video_urls
def _chooseBackgroundMusic(self):
if self._db_background_music_name:
self._db_background_music_url = AssetDatabase.get_asset_link(self._db_background_music_name)
def _prepareBackgroundAssets(self):
self.verifyParameters(voiceover_audio_url=self._db_audio_path)
if not self._db_voiceover_duration:
self.logger("Rendering short: (1/4) preparing voice asset...")
self._db_audio_path, self._db_voiceover_duration = get_asset_duration(
self._db_audio_path, isVideo=False)
def _prepareCustomAssets(self):
self.logger("Rendering short: (3/4) preparing custom assets...")
pass
def _editAndRenderShort(self):
self.verifyParameters(
voiceover_audio_url=self._db_audio_path)
outputPath = self.dynamicAssetDir+"rendered_video.mp4"
if not (os.path.exists(outputPath)):
self.logger("Rendering short: Starting automated editing...")
videoEditor = EditingEngine()
videoEditor.addEditingStep(EditingStep.ADD_VOICEOVER_AUDIO, {
'url': self._db_audio_path})
if (self._db_background_music_url):
videoEditor.addEditingStep(EditingStep.ADD_BACKGROUND_MUSIC, {'url': self._db_background_music_url,
'loop_background_music': self._db_voiceover_duration,
"volume_percentage": 0.08})
for (t1, t2), video_url in self._db_timed_video_urls:
videoEditor.addEditingStep(EditingStep.ADD_BACKGROUND_VIDEO, {'url': video_url,
'set_time_start': t1,
'set_time_end': t2})
if (self._db_format_vertical):
caption_type = EditingStep.ADD_CAPTION_SHORT_ARABIC if self._db_language == Language.ARABIC.value else EditingStep.ADD_CAPTION_SHORT
else:
caption_type = EditingStep.ADD_CAPTION_LANDSCAPE_ARABIC if self._db_language == Language.ARABIC.value else EditingStep.ADD_CAPTION_LANDSCAPE
for (t1, t2), text in self._db_timed_captions:
videoEditor.addEditingStep(caption_type, {'text': text.upper(),
'set_time_start': t1,
'set_time_end': t2})
videoEditor.renderVideo(outputPath, logger= self.logger if self.logger is not self.default_logger else None)
self._db_video_path = outputPath
def _addMetadata(self):
if not os.path.exists('videos/'):
os.makedirs('videos')
self._db_yt_title, self._db_yt_description = gpt_yt.generate_title_description_dict(self._db_script)
now = datetime.datetime.now()
date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
newFileName = f"videos/{date_str} - " + \
re.sub(r"[^a-zA-Z0-9 '\n\.]", '', self._db_yt_title)
shutil.move(self._db_video_path, newFileName+".mp4")
with open(newFileName+".txt", "w", encoding="utf-8") as f:
f.write(
f"---Youtube title---\n{self._db_yt_title}\n---Youtube description---\n{self._db_yt_description}")
self._db_video_path = newFileName+".mp4"
self._db_ready_to_upload = True
================================================
FILE: shortGPT/engine/facts_short_engine.py
================================================
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.gpt import facts_gpt
from shortGPT.config.languages import Language
from shortGPT.engine.content_short_engine import ContentShortEngine
class FactsShortEngine(ContentShortEngine):
def __init__(self, voiceModule: VoiceModule, facts_type: str, background_video_name: str, background_music_name: str,short_id="",
num_images=None, watermark=None, language:Language = Language.ENGLISH):
super().__init__(short_id=short_id, short_type="facts_shorts", background_video_name=background_video_name, background_music_name=background_music_name,
num_images=num_images, watermark=watermark, language=language, voiceModule=voiceModule)
self._db_facts_type = facts_type
def _generateScript(self):
"""
Implements Abstract parent method to generate the script for the Facts short.
"""
self._db_script = facts_gpt.generateFacts(self._db_facts_type)
================================================
FILE: shortGPT/engine/multi_language_translation_engine.py
================================================
import datetime
import os
import re
import shutil
from tqdm import tqdm
from shortGPT.audio.audio_duration import get_asset_duration
from shortGPT.audio.audio_utils import (audioToText, get_asset_duration,
run_background_audio_split,
speedUpAudio)
from shortGPT.audio.eleven_voice_module import VoiceModule
from shortGPT.config.languages import ACRONYM_LANGUAGE_MAPPING, Language
from shortGPT.editing_framework.editing_engine import (EditingEngine,
EditingStep)
from shortGPT.editing_utils.captions import (getCaptionsWithTime,
getSpeechBlocks)
from shortGPT.editing_utils.handle_videos import get_aspect_ratio
from shortGPT.engine.abstract_content_engine import CONTENT_DB, AbstractContentEngine
from shortGPT.gpt.gpt_translate import translateContent
class MultiLanguageTranslationEngine(AbstractContentEngine):
def __init__(self, voiceModule: VoiceModule, src_url: str = "", target_language: Language = Language.ENGLISH, use_captions=False, id=""):
super().__init__(id, "content_translation", target_language, voiceModule)
if not id:
self._db_should_translate = True
if src_url:
self._db_src_url = src_url
self._db_use_captions = use_captions
self._db_target_language = target_language.value
self.stepDict = {
1: self._transcribe_audio,
2: self._translate_content,
3: self._generate_translated_audio,
4: self._edit_and_render_video,
5: self._add_metadata
}
def _transcribe_audio(self):
cached_translation = CONTENT_DB.content_collection.find_one({
"content_type": 'content_translation',
'src_url': self._db_src_url,
'ready_to_upload': True
})
if not (cached_translation and 'speech_blocks' in cached_translation and 'original_language' in cached_translation):
video_audio, _ = get_asset_duration(self._db_src_url, isVideo=False)
self.verifyParameters(content_path=video_audio)
self.logger(f"1/5 - Transcribing original audio to text...")
whispered = audioToText(video_audio, model_size='base')
self._db_speech_blocks = getSpeechBlocks(whispered, silence_time=0.8)
self._db_original_language = whispered['language']
if (ACRONYM_LANGUAGE_MAPPING.get(self._db_original_language) == Language(self._db_target_language)):
self._db_translated_timed_sentences = self._db_speech_blocks
self._db_should_translate = False
expected_chars = len("".join([text for _, text in self._db_speech_blocks]))
chars_remaining = self.voiceModule.get_remaining_characters()
if chars_remaining < expected_chars:
raise Exception(
f"Your VoiceModule's key doesn't have enough characters to totally translate this video | Remaining: {chars_remaining} | Number of characters to translate: {expected_chars}")
def _translate_content(self):
if (self._db_should_translate):
self.verifyParameters(_db_speech_blocks=self._db_speech_blocks)
translated_timed_sentences = []
for i, ((t1, t2), text) in tqdm(enumerate(self._db_speech_blocks), desc="Translating content"):
self.logger(f"2/5 - Translating text content - {i+1} / {len(self._db_speech_blocks)}")
translated_text = translateContent(text, self._db_target_language)
translated_timed_sentences.append([[t1, t2], translated_text])
self._db_translated_timed_sentences = translated_timed_sentences
def _generate_translated_audio(self):
self.verifyParameters(translated_timed_sentences=self._db_translated_timed_sentences)
translated_audio_blocks = []
for i, ((t1, t2), translated_text) in tqdm(enumerate(self._db_translated_timed_sentences), desc="Generating translated audio"):
self.logger(f"3/5 - Generating translated audio - {i+1} / {len(self._db_translated_timed_sentences)}")
translated_voice = self.voiceModule.generate_voice(translated_text, self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}.wav")
if not translated_voice:
raise Exception('An error happending during audio voice creation')
final_audio_path = speedUpAudio(translated_voice, self.dynamicAssetDir+f"translated_{i}_{self._db_target_language}_spedup.wav", expected_duration=t2-t1 - 0.05)
_, translated_duration = get_asset_duration(final_audio_path, isVideo=False)
translated_audio_blocks.append([[t1, t1+translated_duration], final_audio_path])
self._db_audio_bits = translated_audio_blocks
def _edit_and_render_video(self):
self.verifyParameters(_db_audio_bits=self._db_audio_bits)
self.logger(f"4.1 / 5 - Preparing automated editing")
target_language = Language(self._db_target_language)
input_video, video_length = get_asset_duration(self._db_src_url)
video_audio, _ = get_asset_duration(self._db_src_url, isVideo=False)
editing_engine = EditingEngine()
editing_engine.addEditingStep(EditingStep.ADD_BACKGROUND_VIDEO, {'url': input_video, "set_time_start": 0, "set_time_end": video_length})
last_t2 = 0
for (t1, t2), audio_path in self._db_audio_bits:
t2+=-0.05
editing_engine.addEditingStep(EditingStep.INSERT_AUDIO, {'url': audio_path, 'set_time_start': t1, 'set_time_end': t2})
if t1-last_t2 >4:
editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"start_time": last_t2, "end_time": t1}, "set_time_start": last_t2, "set_time_end": t1})
last_t2 = t2
if video_length - last_t2 >4:
editing_engine.addEditingStep(EditingStep.EXTRACT_AUDIO, {"url": video_audio, "subclip": {"start_time": last_t2, "end_time": video_length}, "set_time_start": last_t2, "set_time_end": video_length})
if self._db_use_captions:
is_landscape = get_aspect_ratio(input_video) > 1
if not self._db_timed_translated_captions:
if not self._db_translated_voiceover_path:
self.logger(f"4.5 / 5 - Generating captions in {target_language.value}")
editing_engine.generateAudio(self.dynamicAssetDir+"translated_voiceover.wav")
self._db_translated_voiceover_path = self.dynamicAssetDir+"translated_voiceover.wav"
whispered_translated = audioToText(self._db_translated_voiceover_path, model_size='base')
timed_translated_captions = getCaptionsWithTime(whispered_translated, maxCaptionSize=50 if is_landscape else 15, considerPunctuation=True)
self._db_timed_translated_captions = [[[t1,t2], text] for (t1, t2), text in timed_translated_captions if t2 - t1 <= 4]
for (t1, t2), text in self._db_timed_translated_captions:
caption_key = "LANDSCAPE" if is_landscape else "SHORT"
caption_key += "_ARABIC" if target_language == Language.ARABIC else ""
caption_type = getattr(EditingStep, f"ADD_CAPTION_{caption_key}")
editing_engine.addEditingStep(caption_type, {'text': text, "set_time_start": t1, "set_time_end": t2})
self._db_video_path = self.dynamicAssetDir+"translated_content.mp4"
editing_engine.renderVideo(self._db_video_path, logger= self.logger if self.logger is not self.default_logger else None)
def _add_metadata(self):
self.logger(f"5 / 5 - Saving translated video")
now = datetime.datetime.now()
date_str = now.strftime("%Y-%m-%d_%H-%M-%S")
newFileName = f"videos/{date_str} - " + \
re.sub(r"[^a-zA-Z0-9 '\n\.]", '', f"translated_content_to_{self._db_target_language}")
shutil.move(self._db_video_path, newFileName+".mp4")
self._db_video_path = newFileName+".mp4"
self._db_ready_to_upload = True
================================================
FILE: shortGPT/engine/reddit_short_engine.py
================================================
from shortGPT.audio.voice_module import VoiceModule
from shortGPT.config.asset_db import AssetDatabase
from shortGPT.config.languages import Language
from shortGPT.engine.content_short_engine import ContentShortEngine
from shortGPT.editing_framework.editing_engine import EditingEngine, EditingStep, Flow
from shortGPT.gpt import reddit_gpt, gpt_voice
import os
class RedditShortEngine(ContentShortEngine):
# Mapping of variable names to database paths
def __init__(self,voiceModule: VoiceModule, background_video_name: str, background_music_name: str,short_id="",
num_images=None, watermark=None, language:Language = Language.ENGLISH):
super().__init__(short_id=short_id, short_type="reddit_shorts", background_video_name=background_video_name, background_music_name=background_music_name,
num_images=num_images, watermark=watermark, language=language, voiceModule=voiceModule)
def __generateRandomStory(self):
question = reddit_gpt.getInterestingRedditQuestion()
script = reddit_gpt.createRedditScript(question)
return script
def __getRealisticStory(self, max_tries=3):
current_realistic_score = 0
current_try = 0
current_generated_script = ""
while (current_realistic_score < 6 and current_try < max_tries) or len(current_generated_script) > 1000:
new_script = self.__generateRandomStory()
new_realistic_score = reddit_gpt.getRealisticness(new_script)
if new_realistic_score >= current_realistic_score:
current_generated_script = new_script
current_realistic_score = new_realistic_score
current_try += 1
return current_generated_script, current_try
def _generateScript(self):
"""
Implements Abstract parent method to generate the script for the reddit short
"""
self.logger("Generating reddit question & entertaining story")
self._db_script, _ = self.__getRealisticStory(max_tries=1)
self._db_reddit_question = reddit_gpt.getQuestionFromThread(
self._db_script)
def _prepareCustomAssets(self):
"""
Override parent method to generate custom reddit image asset
"""
self.logger("Rendering short: (3/4) preparing custom reddit image...")
self.verifyParameters(question=self._db_reddit_question,)
title, header, n_comments, n_upvotes = reddit_gpt.generateRedditPostMetadata(
self._db_reddit_question)
imageEditingEngine = EditingEngine()
imageEditingEngine.ingestFlow(Flow.WHITE_REDDIT_IMAGE_FLOW, {
"username_text": header,
"ncomments_text": n_comments,
"nupvote_text": n_upvotes,
"question_text": title
})
imageEditingEngine.renderImage(
self.dynamicAssetDir+"redditThreadImage.png")
self._db_reddit_thread_image = self.dynamicAssetDir+"redditThreadImage.png"
def _editAndRenderShort(self):
"""
Override parent method to customize video rendering sequence by adding a Reddit image
"""
self.verifyParameters(
voiceover_audio_url=self._db_audio_path,
video_duration=self._db_background_video_duration,
music_url=self._db_background_music_url)
outputPath = self.dynamicAssetDir+"rendered_video.mp4"
if not (os.path.exists(outputPath)):
self.logger("Rendering short: Starting automated editing...")
videoEditor = EditingEngine()
videoEditor.addEditingStep(EditingStep.ADD_VOICEOVER_AUDIO, {
'url': self._db_audio_path})
videoEditor.addEditingStep(EditingStep.ADD_BACKGROUND_MUSIC, {'url': self._db_background_music_url,
'loop_background_music': self._db_voiceover_duration,
"volume_percentage": 0.11})
videoEditor.addEditingStep(EditingStep.CROP_1920x1080, {
'url': self._db_background_trimmed})
videoEditor.addEditingStep(EditingStep.ADD_SUBSCRIBE_ANIMATION, {'url': AssetDatabase.get_asset_link('subscribe animation')})
if self._db_watermark:
videoEditor.addEditingStep(EditingStep.ADD_WATERMARK, {
'text': self._db_watermark})
videoEditor.addEditingStep(EditingStep.ADD_REDDIT_IMAGE, {
'url': self._db_reddit_thread_image})
caption_type = EditingStep.ADD_CAPTION_SHORT_ARABIC if self._db_language == Language.ARABIC.value else EditingStep.ADD_CAPTION_SHORT
for timing, text in self._db_timed_captions:
videoEditor.addEditingStep(caption_type, {'text': text.upper(),
'set_time_start': timing[0],
'set_time_end': timing[1]})
if self._db_num_images:
for timing, image_url in self._db_timed_image_urls:
videoEditor.addEditingStep(EditingStep.SHOW_IMAGE, {'url': image_url,
'set_time_start': timing[0],
'set_time_end': timing[1]})
videoEditor.renderVideo(outputPath, logger= self.logger if self.logger is not self.default_logger else None)
self._db_video_path = outputPath
================================================
FILE: shortGPT/gpt/README.md
================================================
# Module: gpt
The `gpt` module provides various functions for working with the OpenAI GPT-3 API. This module consists of multiple files, each serving a specific purpose. Let's take a look at each file and its contents.
## File: gpt_utils.py
This file contains utility functions used by other files in the module. Here are the functions defined in this file:
### `num_tokens_from_messages(texts, model="gpt-3.5-turbo-0301")`
This function calculates the number of tokens used by a list of messages. It takes the `texts` parameter as input, which can be either a string or a list of strings. The function returns the total number of tokens used.
### `extract_biggest_json(string)`
This function extracts the largest JSON object from a string. It searches for JSON objects using a regular expression and returns the object with the maximum length.
### `get_first_number(string)`
This function searches for the first occurrence of a number in a string and returns it. It uses a regular expression to match the number.
### `load_yaml_file(file_path: str) -> dict`
This function reads and returns the contents of a YAML file as a dictionary. It takes the file path as input and uses the `yaml.safe_load()` function to parse the YAML file.
### `load_json_file(file_path)`
This function reads and returns the contents of a JSON file. It takes the file path as input and uses the `json.load()` function to parse the JSON file.
### `load_local_yaml_prompt(file_path)`
This function loads a YAML file containing chat and system prompts and returns the chat and system prompts as separate strings.
### `open_file(filepath)`
This function opens and reads a file and returns its contents as a string. It takes the file path as input and uses the `open()` function to read the file.
### `llm_completion(chat_prompt="", system="You are an AI that can give the answer to anything", temp=0.7, model="gpt-3.5-turbo", max_tokens=1000, remove_nl=True, conversation=None)`
This function performs a GPT-3 completion using the OpenAI API. It takes various parameters such as chat prompt, system prompt, temperature, model, and maximum tokens. It returns the generated text as a response from the GPT-3 model.
## File: reddit_gpt.py
This file contains functions related to generating Reddit posts. Here are the functions defined in this file:
### `generateRedditPostMetadata(title)`
This function generates metadata for a Reddit post. It takes the post title as input and returns the title, header, number of comments, and number of upvotes.
### `getInterestingRedditQuestion()`
This function generates an interesting question for a Reddit post. It uses a YAML file containing chat and system prompts to generate the question.
### `createRedditScript(question)`
This function creates a Reddit script based on a given question. It uses a YAML file containing chat and system prompts to generate the script.
### `getRealisticness(text)`
This function calculates the realisticness score of a given text. It uses a YAML file containing chat and system prompts to generate the score.
### `getQuestionFromThread(text)`
This function extracts a question from a Reddit thread. It takes the thread text as input and uses a YAML file containing chat and system prompts to generate the question.
### `generateUsername()`
This function generates a username for a Reddit post. It uses a YAML file containing chat and system prompts to generate the username.
## File: gpt_translate.py
This file contains functions related to translating content using GPT-3. Here is the function defined in this file:
### `translateContent(content, language)`
This function translates the given content to the specified language. It takes the content and language as input and uses a YAML file containing chat and system prompts to perform the translation.
## File: facts_gpt.py
This file contains functions related to generating facts using GPT-3. Here are the functions defined in this file:
### `generateFacts(facts_type)`
This function generates facts of a specific type. It takes the facts type as input and uses a YAML file containing chat and system prompts to generate the facts.
### `generateFactSubjects(n)`
This function generates a list of fact subjects. It takes the number of subjects to generate as input and uses a YAML file containing chat and system prompts to generate the subjects.
## File: gpt_yt.py
This file contains functions related to generating YouTube video titles and descriptions using GPT-3. Here is the function defined in this file:
### `generate_title_description_dict(content)`
This function generates a title and description for a YouTube video based on the given content. It takes the content as input and uses a YAML file containing chat and system prompts to generate the title and description.
## File: gpt_editing.py
This file contains functions related to image and video editing using GPT-3. Here are the functions defined in this file:
### `getImageQueryPairs(captions, n=15, maxTime=2)`
This function generates pairs of image queries and their corresponding timestamps based on the given captions. It takes the captions, number of queries to generate, and maximum time between queries as input. It uses a YAML file containing chat prompts to generate the queries.
### `getVideoSearchQueriesTimed(captions_timed)`
This function generates timed video search queries based on the given captions with timestamps. It takes the captions with timestamps as input and uses a YAML file containing chat and system prompts to generate the queries.
## File: gpt_chat_video.py
This file contains functions related to generating chat video scripts using GPT-3. Here are the functions defined in this file:
### `generateScript(script_description, language)`
This function generates a script for a chat video based on the given description and language. It takes the script description and language as input and uses a YAML file containing chat and system prompts to generate the script.
### `correctScript(script, correction)`
This function corrects a script for a chat video based on the given original script and correction. It takes the original script and correction as input and uses a YAML file containing chat and system prompts to correct the script.
## File: gpt_voice.py
This file contains a function related to identifying the gender of a text using GPT-3. Here is the function defined in this file:
### `getGenderFromText(text)`
This function identifies the gender of a given text. It takes the text as input and uses a YAML file containing chat and system prompts to perform gender identification. It returns either "female" or "male" as the gender.
These are the functions and their descriptions provided by the `gpt` module. Each function serves a specific purpose and can be used to perform various tasks related to GPT-3.
================================================
FILE: shortGPT/gpt/__init__.py
================================================
from . import gpt_utils
from . import reddit_gpt
================================================
FILE: shortGPT/gpt/facts_gpt.py
================================================
from shortGPT.gpt import gpt_utils
import json
def generateFacts(facts_type):
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/facts_generator.yaml')
chat = chat.replace("<>", facts_type)
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1.3)
return result
def generateFactSubjects(n):
out = []
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/facts_subjects_generation.yaml')
chat = chat.replace("<>", f"{n}")
maxAttempts = int(1.5*n)
attempts=0
while len(out) != n & attempts <= maxAttempts:
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1.69)
attempts+=1
try:
out = json.loads(result.replace("'", '"'))
except Exception as e:
print(f"INFO - Failed generating {n} fact subjects after {attempts} trials", e)
pass
if len(out) != n:
raise Exception(f"Failed to generate {n} subjects. In {attempts} attemps")
return out
================================================
FILE: shortGPT/gpt/gpt_chat_video.py
================================================
from shortGPT.gpt import gpt_utils
import json
def generateScript(script_description, language):
out = {'script': ''}
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/chat_video_script.yaml')
chat = chat.replace("<>", script_description).replace("<>", language)
while not ('script' in out and out['script']):
try:
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1)
out = json.loads(result)
except Exception as e:
print(e, "Difficulty parsing the output in gpt_chat_video.generateScript")
return out['script']
def correctScript(script, correction):
out = {'script': ''}
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/chat_video_edit_script.yaml')
chat = chat.replace("<>", script).replace("<>", correction)
while not ('script' in out and out['script']):
try:
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1)
out = json.loads(result)
except Exception as e:
print("Difficulty parsing the output in gpt_chat_video.generateScript")
return out['script']
================================================
FILE: shortGPT/gpt/gpt_editing.py
================================================
from shortGPT.gpt import gpt_utils
import json
def extractJsonFromString(text):
start = text.find('{')
end = text.rfind('}') + 1
if start == -1 or end == 0:
raise Exception("Error: No JSON object found in response")
json_str = text[start:end]
return json.loads(json_str)
def getImageQueryPairs(captions, n=15, maxTime=2):
chat, _ = gpt_utils.load_local_yaml_prompt('prompt_templates/editing_generate_images.yaml')
prompt = chat.replace('<>', f"{captions}").replace("<>", f"{n}")
try:
# Get response and parse JSON
res = gpt_utils.llm_completion(chat_prompt=prompt)
data = extractJsonFromString(res)
# Convert to pairs with time ranges
pairs = []
end_audio = captions[-1][0][1]
for i, item in enumerate(data["image_queries"]):
time = item["timestamp"]
query = item["query"]
# Skip invalid timestamps
if time <= 0 or time >= end_audio:
continue
# Calculate end time for this image
if i < len(data["image_queries"]) - 1:
next_time = data["image_queries"][i + 1]["timestamp"]
end = min(time + maxTime, next_time)
else:
end = min(time + maxTime, end_audio)
pairs.append(((time, end), query + " image"))
return pairs
except json.JSONDecodeError:
print("Error: Invalid JSON response from LLM")
return []
except KeyError:
print("Error: Malformed JSON structure")
return []
except Exception as e:
print(f"Error processing image queries: {str(e)}")
return []
def getVideoSearchQueriesTimed(captions_timed):
"""
Generate timed video search queries based on caption timings.
Returns list of [time_range, search_queries] pairs.
"""
err = ""
for _ in range(4):
try:
# Get total video duration from last caption
end_time = captions_timed[-1][0][1]
# Load and prepare prompt
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/editing_generate_videos.yaml')
prompt = chat.replace("<>", f"{captions_timed}")
# Get response and parse JSON
res = gpt_utils.llm_completion(chat_prompt=prompt, system=system)
data = extractJsonFromString(res)
# Convert to expected format
formatted_queries = []
for segment in data["video_segments"]:
time_range = segment["time_range"]
queries = segment["queries"]
# Validate time range
if not (0 <= time_range[0] < time_range[1] <= end_time):
continue
# Ensure exactly 3 queries
while len(queries) < 3:
queries.append(queries[-1])
queries = queries[:3]
formatted_queries.append([time_range, queries])
# Verify coverage
if not formatted_queries:
raise ValueError("Generated segments don't cover full video duration")
return formatted_queries
except Exception as e:
err = str(e)
print(f"Error generating video search queries {err}")
raise Exception(f"Failed to generate video search queries {err}")
================================================
FILE: shortGPT/gpt/gpt_translate.py
================================================
from shortGPT.gpt import gpt_utils
def translateContent(content, language):
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/translate_content.yaml')
if language == "arabic":
language =="arabic, and make the translated text two third of the length of the original."
system = system.replace("<>", language)
chat = chat.replace("<>", content)
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1)
return result
================================================
FILE: shortGPT/gpt/gpt_utils.py
================================================
import json
import os
import re
from time import sleep, time
import openai
import tiktoken
import yaml
from shortGPT.config.api_db import ApiKeyManager
def num_tokens_from_messages(texts, model="gpt-4o-mini"):
"""Returns the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
except KeyError:
encoding = tiktoken.get_encoding("cl100k_base")
if model == "gpt-4o-mini": # note: future models may deviate from this
if isinstance(texts, str):
texts = [texts]
score = 0
for text in texts:
score += 4 + len(encoding.encode(text))
return score
else:
raise NotImplementedError(f"""num_tokens_from_messages() is not presently implemented for model {model}.
See https://github.com/openai/openai-python/blob/main/chatml.md for information""")
def extract_biggest_json(string):
json_regex = r"\{(?:[^{}]|(?R))*\}"
json_objects = re.findall(json_regex, string)
if json_objects:
return max(json_objects, key=len)
return None
def get_first_number(string):
pattern = r'\b(0|[1-9]|10)\b'
match = re.search(pattern, string)
if match:
return int(match.group())
else:
return None
def load_yaml_file(file_path: str) -> dict:
"""Reads and returns the contents of a YAML file as dictionary"""
return yaml.safe_load(open_file(file_path))
def load_json_file(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
json_data = json.load(f)
return json_data
from pathlib import Path
def load_local_yaml_prompt(file_path):
_here = Path(__file__).parent
_absolute_path = (_here / '..' / file_path).resolve()
json_template = load_yaml_file(str(_absolute_path))
return json_template['chat_prompt'], json_template['system_prompt']
def open_file(filepath):
with open(filepath, 'r', encoding='utf-8') as infile:
return infile.read()
from openai import OpenAI
def llm_completion(chat_prompt="", system="", temp=0.7, max_tokens=2000, remove_nl=True, conversation=None):
openai_key= ApiKeyManager.get_api_key("OPENAI_API_KEY")
gemini_key = ApiKeyManager.get_api_key("GEMINI_API_KEY")
if gemini_key:
client = OpenAI(
api_key=gemini_key,
base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)
model="gemini-2.0-flash-lite-preview-02-05"
elif openai_key:
client = OpenAI( api_key=openai_key)
model="gpt-4o-mini"
else:
raise Exception("No OpenAI or Gemini API Key found for LLM request")
max_retry = 5
retry = 0
error = ""
for i in range(max_retry):
try:
if conversation:
messages = conversation
else:
messages = [
{"role": "system", "content": system},
{"role": "user", "content": chat_prompt}
]
response = client.chat.completions.create(
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temp,
timeout=30
)
text = response.choices[0].message.content.strip()
if remove_nl:
text = re.sub('\s+', ' ', text)
filename = '%s_llm_completion.txt' % time()
if not os.path.exists('.logs/gpt_logs'):
os.makedirs('.logs/gpt_logs')
with open('.logs/gpt_logs/%s' % filename, 'w', encoding='utf-8') as outfile:
outfile.write(f"System prompt: ===\n{system}\n===\n"+f"Chat prompt: ===\n{chat_prompt}\n===\n" + f'RESPONSE:\n====\n{text}\n===\n')
return text
except Exception as oops:
retry += 1
print('Error communicating with OpenAI:', oops)
error = str(oops)
sleep(1)
raise Exception(f"Error communicating with LLM Endpoint Completion errored more than error: {error}")
================================================
FILE: shortGPT/gpt/gpt_voice.py
================================================
from shortGPT.gpt import gpt_utils
def getGenderFromText(text):
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/voice_identify_gender.yaml')
chat = chat.replace("<>", text)
result = gpt_utils.llm_completion(chat_prompt=chat, system=system).replace("\n", "").lower()
if 'female' in result:
return 'female'
return 'male'
================================================
FILE: shortGPT/gpt/gpt_yt.py
================================================
from shortGPT.gpt import gpt_utils
import json
def generate_title_description_dict(content):
out = {"title": "", "description":""}
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/yt_title_description.yaml')
chat = chat.replace("<>", f"{content}")
while out["title"] == "" or out["description"] == "":
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1)
try:
response = json.loads(result)
if "title" in response:
out["title"] = response["title"]
if "description" in response:
out["description"] = response["description"]
except Exception as e:
pass
return out['title'], out['description']
================================================
FILE: shortGPT/gpt/reddit_gpt.py
================================================
from shortGPT.gpt import gpt_utils
import random
import json
def generateRedditPostMetadata(title):
name = generateUsername()
if title and title[0] == '"':
title = title.replace('"', '')
n_months = random.randint(1,11)
header = f"{name} - {n_months} months ago"
n_comments = random.random() * 10 + 2
n_upvotes = n_comments*(1.2+ random.random()*2.5)
return title, header, f"{n_comments:.1f}k", f"{n_upvotes:.1f}k"
def getInterestingRedditQuestion():
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/reddit_generate_question.yaml')
return gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1.08)
def createRedditScript(question):
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/reddit_generate_script.yaml')
chat = chat.replace("<>", question)
result = "Reddit, " + question +" "+gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1.08)
return result
def getRealisticness(text):
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/reddit_filter_realistic.yaml')
chat = chat.replace("<>", text)
attempts = 0
while attempts <= 4:
attempts+=1
try:
result = gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1)
return json.loads(result)['score']
except Exception as e:
print("Error in getRealisticness", e.args[0])
raise Exception("LLM Failed to generate a realisticness score on the script")
def getQuestionFromThread(text):
if ((text.find("Reddit, ") < 15) and (10 < text.find("?") < 100)):
question = text.split("?")[0].replace("Reddit, ", "").strip().capitalize()
else:
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/reddit_filter_realistic.yaml')
chat = chat.replace("<>", text)
question = gpt_utils.llm_completion(chat_prompt=chat, system=system).replace("\n", "")
question = question.replace('"', '').replace("?", "")
return question
def generateUsername():
chat, system = gpt_utils.load_local_yaml_prompt('prompt_templates/reddit_username.yaml')
return gpt_utils.llm_completion(chat_prompt=chat, system=system, temp=1.2).replace("u/", "")
================================================
FILE: shortGPT/prompt_templates/__init__.py
================================================
================================================
FILE: shortGPT/prompt_templates/chat_video_edit_script.yaml
================================================
system_prompt: |
You are an expert video script writer / editor. You ONLY write text that is read. You only write the script that will be read by a voice actor for a video. The user will give you a script they have already written and the corrections they want you to make. From that, you will edit the script. Make sure to directly edit the script in response to the corrections given.
Your edited script will not have any reference to the audio footage / video footage shown. Only the text that will be narrated by the voice actor.
You will edit purely text.
Don't write any other textual thing than the text itself.
Make sure the text is not longer than 200 words (keep the video pretty short and neat).
# Output
You will output the edited script in a JSON format of this kind, and only a parsable JSON object
{"script": "did you know that ... ?"}
chat_prompt: |
Original script:
<>
Corrections:
<>
================================================
FILE: shortGPT/prompt_templates/chat_video_script.yaml
================================================
system_prompt: |
You are an expert video writer. You ONLY produce text that is read. You only produce the script. that will be read by a voice actor for a video. The user will give you the description of the video they want you to make and from that, you will write the script. Make sure to directly write the script in response to the video description.
Your script will not have any reference to the audio footage / video footage shown. Only the text that will be narrated by the voice actor.
You will produce purely text.
Don't write any other textual thing than the text itself.
Make sure the text is not longer than 200 words (keep the video pretty short and neat).
# Output
You will output the script in a JSON format of this kind, and only a parsable JSON object
{"script": "did you know that ... ?"}
chat_prompt: |
Language: <>
Video description:
<>
================================================
FILE: shortGPT/prompt_templates/editing_generate_images.yaml
================================================
system_prompt: |
You are an AI specialized in generating precise image search queries for video editing. You must output ONLY valid JSON in the specified format, with no additional text.
chat_prompt: |
You are a shorts video editor. Your audience is people from 18 yo to 40yo. Your style of editing is pretty simple, you take the transcript of your short and put a very simple google image to illustrate the narrated sentences.
Each google image is searched with a short query of two words maximum. So let's say someone is talking about being sad, you would query on google `sad person frowning` and show that image around that sentence.
I will give you a transcript which contains which words are shown at the screen, and the timestamps where they are shown. Understand the transcript, and time images at timestamps and, write me the query for each image. For the image queries you have two choices: concrete objects, like 'cash', 'old table', and other objects, or people in situations like 'sad person', 'happy family', etc... Generate a maximum of <> image queries equally distributed in the video.
Avoid depicting shocking or nude / crude images, since your video will get demonetized. The queries should bring images that represent objects and persons that are useful to understand the emotions and what is happening in the transcript. The queries should describe OBJECTS or PERSONS. So for something romantic, maybe a couple hugging, or a heart-shaped balloon.
The images should be an image representation of what is happening. Use places and real life people as image queries if you find any in the transcript. Avoid using overly generic queries like 'smiling man' that can bring up horror movie pictures, use the word 'person instead'. Instead, try to use more specific words that describe the action or emotion in the scene.
IMPORTANT OUTPUT RULES:
1. NEVER use abstract nouns in the queries
2. ALWAYS use real objects or persons in the queries
3. Choose more objects than people
4. Generate exactly <> queries
5. Output must be valid JSON in this format:
{
"image_queries": [
{"timestamp": 1.0, "query": "happy person"},
{"timestamp": 3.2, "query": "red car"}
]
}
Transcript:
<>
Generate exactly <> evenly distributed image queries based on the transcript above. Output ONLY the JSON response, no additional text.
================================================
FILE: shortGPT/prompt_templates/editing_generate_videos.yaml
================================================
system_prompt: |
You are an AI specialized in generating precise video search queries for video editing. You must output ONLY valid JSON in the specified format, with no additional text.
chat_prompt: |
You are a video editor specializing in creating engaging visual content. Your task is to generate video search queries that will be used to find background footage that matches the narrative of the video.
For each time segment (4-5 seconds long), you need to suggest 3 alternative search queries that could be used to find appropriate video footage. Each query must be 1-2 words and should describe concrete, visual scenes or actions.
Guidelines for queries:
1. Use ONLY English words
2. Keep queries between 1-2 words
3. Focus on visual, concrete objects or actions
4. Avoid abstract concepts
5. Include both static and dynamic scenes
6. Ensure queries are family-friendly and safe for monetization
Good examples:
- "ocean waves"
- "typing keyboard"
- "city traffic"
Bad examples:
- "feeling sad" (abstract)
- "beautiful nature landscape morning sun" (too many words)
- "confused thoughts" (not visual)
The output must be valid JSON in this format:
{
"video_segments": [
{
"time_range": [0.0, 4.324],
"queries": ["coffee steam", "hot drink", "morning breakfast"]
},
{
"time_range": [4.324, 9.56],
"queries": ["office work", "desk computer", "typing hands"]
}
]
}
Timed captions:
<>
Generate video segments of 4-5 seconds covering the entire video duration.
Make sure to perfectly fit the end of the video, with the EXACT same floating point accuracy as in the transcript above.
Output ONLY the JSON response, no additional text.
================================================
FILE: shortGPT/prompt_templates/facts_generator.yaml
================================================
system_prompt: >
You are an expert content writer of a YouTube shorts channel. You specialize in `facts` shorts.
Your facts shorts are less than 50 seconds verbally ( around 140 words maximum). They are extremely captivating, and original.
The user will ask you a type of facts short and you will produce it.
For examples, when the user Asks :
`Weird facts`
You produce the following content script:
---
Weird facts you don't know.
A swarm of 20,000 bees followed a car for two days because their queen was stuck inside.
Rockados cannot stick their tongue out because it's attached to the roof of their mouths.
If you tickle a rat day after day, it will start laughing whenever it sees you.
In 2013, police and the Maldives arrested a coconut for lordering near a polling station for the presidential election.
Locals fear the coconut may have been ingrained with a black magic spell to influence the election.
A Chinese farmer who always wanted to own his own plane built a full scale,
non-working replica of an airbus A320 out of 50 tons of steel. It took him and his friends over two years and costed over $400,000.
When invited by a lady to spend a night with her, Benjamin Franklin asked to postpone until winter when nights were longer.
---
You are now tasked to produce the greatest short script depending on the user's request type of 'facts'.
Only give the first `hook`, like "Weird facts you don't know. " in the example. Then the facts.
Keep it short, extremely interesting and original.
chat_prompt: >
<>
================================================
FILE: shortGPT/prompt_templates/facts_subjects_generation.yaml
================================================
system_prompt: >
chat_prompt: >
For a series of <> youtube video about top 10 facts on a certain subject,
pick a random subject. Be very original. Put it in the '`Subject` facts' format.
Give the output in an array format that's json parseable., like ['Police facts', 'prison facts'].
Only give the array and nothing else.
================================================
FILE: shortGPT/prompt_templates/reddit_extract_question.yaml
================================================
system_prompt: |
From the transcript of a reddit ask, tell me the question in the title. The transcript always answers the question that a redditor asks in the title of the thread.
The question in the title must be a very shorts open-ended question that requires opinion/anecdotal-based answers. Examples of questions are:
---
What’s the worst part of having a child?
What screams “this person peaked in high school” to you?
What was your “it can’t be that easy / it was that easy” moment in your life?
---
Rules:
Most important rule : The question MUST be directed at the person reading it, the subject of the question should ALWAYS be the reader. It must contain 'you' or 'your', or something asking THEM their experience.
* The question is always very general, and then, people answer it with a specific anecdote that is related to that question. The question is always short and can bring spicy answers. By taking inspiration from the questions above, try to find the reddit thread question where we get the following anecdote.
* The question NEVER contains "I" as it is NOT answered by the person asking it.
* The question is NEVER specific too specific about a certain situation.
* The question should be as short and consise as possible. NEVER be too wordy, it must be fast and concise, and it doesn't matter if it's too general.
* The question must sound good to the ear, and bring interest. It should sound natural.
* The question must use the vocabulary of reddit users. Young, not too complicated, and very straight to the point.
* The question must be relatable for anyone, girl or guy.
The question should ALWAYS START with "What"
chat_prompt: |
-Transcript:
<>
The question should ALWAYS START with "What"
-Most probable very short and conssise open-ended question from the transcript (50 characters MAXIMUM):
================================================
FILE: shortGPT/prompt_templates/reddit_filter_realistic.yaml
================================================
system_prompt: |
You are the judge of the story. Your goal will be to judge if it can possibly happen.
If it's possible and the story makes sense, then it's a 10, and if it's something that wouldn't ever happen in real life or something that doesn't make sense at all, it's a 0.
You have to be tolerant and keep in mind that the stories are sometimes very unlikely, but really happened, so you will only give a low score when something doesn't make sense in the story.
For parsing purposes, you will ALWAYS the output as a JSON OBJECT with the key `score` and the value being the number between 1 to 10 and.
The output should be perfect parseable json, like:
{"score": 1.3}
chat_prompt: |
Story:
<>
Output:
================================================
FILE: shortGPT/prompt_templates/reddit_generate_question.yaml
================================================
system_prompt: |
You will write an interesting reddit ask thread question.
Instructions for the question:
The question in the must be a very shorts open-ended question that requires opinion/anecdotal-based answers. Examples of questions are:
---
What’s the worst part of having a child?
What screams “this person peaked in high school” to you?
What was your “it can’t be that easy / it was that easy” moment in your life?
Have you ever had a bad date turning into a good one?
---
Most important rule for questions : The question MUST be directed at the person reading it, the subject of the question should ALWAYS be the reader. It must contain 'you' or 'your', or something asking THEM their experience.
* The question is always very general, and then, people answer it with a specific anecdote that is related to that question. The question is always short and can bring spicy answers.
* The question NEVER contains 'I' as it is NOT answered by the person asking it.
* The question is NEVER too specific about a certain situation.
* The question should be as short and consise as possible. NEVER be too wordy, it must be fast and concise.
* The question must sound good to the ear, and bring interest. It should sound natural.
* The question must use the vocabulary of reddit users. Young, not too complicated, and very straight to the point.
The question must spark curiosity and interest, and must create very entertaining answers
* The question must be relatable for anyone, girl or guy.
* The question is maximum 80 characters long
chat_prompt: |
Totally new question:
================================================
FILE: shortGPT/prompt_templates/reddit_generate_script.yaml
================================================
system_prompt: |
Instructions for the new story:
You are a YouTube shorts content creator who makes extremely good YouTube shorts over answers from AskReddit questions. I'm going to give you a question, and you will give an anecdote as if you are a redditor than answered that question (narrated with 'I' in the first person). The anecdote you will create will be used in a YouTube short that will get 1 million views.
1- The story must be between 120 and 140 words MAXIMUM.
2- DO NOT end the story with a moral conclusion or any sort of conclusion that elongates the personal story. Just stop it when it makes sense.
3- Make sure that the story is very SPICY, very unusual, HIGHLY entertaining to listen to, not boring, and not a classic story that everyone tells.
4- Make sure that the new short's content is totally captivating and will bang with the YouTube algorithm.
5- Make sure that the story directly answers the title.
6- Make the question sound like an r/AskReddit question: open-ended and very interesting, very short and not too specific.
7- The language used in the story must be familiar, casual that a normal person telling an story would use. Even youthful.
8- The story must be narrated as if you're a friend of the viewer telling them about the story.
9- Start the the story with 'I'
chat_prompt: |
Reddit question: <>
-New Generated story. The story has to be highly unusual and spicy and must really surprise its listeners and hook them up to the story. Don't forget to make it between 120 and 140 words:
Reddit, <>
================================================
FILE: shortGPT/prompt_templates/reddit_story_filter.yaml
================================================
system_prompt: >
You're a judge of the realisticness of a story for a youtube short.
You must put yourself in the shoes of the youtube viewer hearing this story
and determine if it's totally nonsense.
Your goal will be to judge if it can possibly happen.
If it's possible and the story makes sense, then it's a 10,
and if it's something that wouldn't ever happen in real life or
something that doesn't make sense at all, it's a 0.
You have to be tolerant and keep in mind that the stories are meant to be unusual, they are sometimes very unlikely,
but really happened, so you will only give a low score when something doesn't make sense in the story.
For parsing purposes, you will ALWAYS the output as a JSON OBJECT with the key
'score' and the value being the number between 1 to 10 and the key 'explanation'
with one sentence to explain why it's not. Make this explanation maximum 4 words.
The output should look like:
{"score": 4.5, "explanation": "some words..."}
Give perfect json with keys score and explanation, and nothing else.
chat_prompt: >
Story:
<>
Output:
================================================
FILE: shortGPT/prompt_templates/reddit_username.yaml
================================================
system_prompt: >
chat_prompt: >
Generate a random Reddit name with one or two numbers inside the name. Only generate one name, and don't output anything else. Make it sound natural. The name must be between 7 and 10 characters:
u/
================================================
FILE: shortGPT/prompt_templates/translate_content.yaml
================================================
system_prompt: >
You're an expert content translator to <>.
The user will give you any text in any language, and your task is to perfectly translate it to <>.
**
chat_prompt: >
<>
================================================
FILE: shortGPT/prompt_templates/voice_identify_gender.yaml
================================================
system_prompt: |
I will give you a narrated transcript and you must identify if it's most probably a male or female.
If you think the narrator is more probable to be a male, answer "male" and if you think it's female, say "female".
If you don't know, just say male.
chat_prompt: |
Transcript:
<>
Gender of narrator:
================================================
FILE: shortGPT/prompt_templates/yt_title_description.yaml
================================================
system_prompt: >
You are a youtube shorts title and description expert writer.
The user will give you the transcript of a youtube short, and you will create a title, and a description. In function of the audience, demography of viewers, you will adapt the title to be catchy.
Use only MAXIMUM 2 emojis in the title of the video ( very depending on the context, be careful)
and use hashtags in the description
The title has to be less than 80 characters (one small sentance of 10 words max)
And the description maximum 240 characters (keep it small)
You will give the title and description in a perfect json format. You will give nothing else but the perfect json object with key `title` and `description`
In your JSON, use the double quotes "" instead of ''
chat_prompt: >
<>
================================================
FILE: shortGPT/tracking/README.md
================================================
# Module: Tracking
## Goal
The `tracking` module is responsible for tracking and analyzing the usage and cost of various APIs used in the project. It includes two files: `api_tracking.py` and `cost_analytics.py`.
## File: api_tracking.py
### Class: APITracker
This class is responsible for tracking the usage of APIs and saving the data to a content manager.
#### Method: `__init__()`
- Initializes the APITracker object.
- Calls the `initiateAPITracking()` method.
#### Method: `setDataManager(contentManager: ContentDataManager)`
- Sets the content manager for storing the API usage data.
- Raises an exception if the content manager is null.
#### Method: `openAIWrapper(gptFunc)`
- Wrapper function for OpenAI API calls.
- Saves the API usage data to the content manager.
- Returns the result of the API call.
#### Method: `elevenWrapper(audioFunc)`
- Wrapper function for Eleven API calls.
- Saves the API usage data to the content manager.
- Returns the result of the API call.
#### Method: `wrap_turbo()`
- Wraps the `llm_completion` function from the `gpt_utils` module using the `openAIWrapper` method.
- Replaces the original function with the wrapped function.
#### Method: `wrap_eleven()`
- Wraps the `generateVoice` function from the `audio_generation` module using the `elevenWrapper` method.
- Replaces the original function with the wrapped function.
#### Method: `initiateAPITracking()`
- Initiates the tracking of APIs by wrapping the necessary functions using the `wrap_turbo` and `wrap_eleven` methods.
## File: cost_analytics.py
### Function: calculateCostAnalytics()
This function calculates the average usage and cost of OpenAI and Eleven APIs based on the data stored in the content database.
- Initializes the content database.
- Retrieves the API usage data from the database.
- Calculates the average usage and cost for OpenAI and Eleven APIs.
- Prints the results.
### Usage example:
```python
calculateCostAnalytics()
```
Note: The commented code at the end of the file is unrelated and can be ignored.
================================================
FILE: shortGPT/tracking/__init__.py
================================================
from . import api_tracking
================================================
FILE: shortGPT/tracking/api_tracking.py
================================================
from shortGPT.gpt import gpt_utils
from shortGPT.database.content_data_manager import ContentDataManager
import json
class APITracker:
def __init__(self):
self.initiateAPITracking()
def setDataManager(self, contentManager : ContentDataManager):
if(not contentManager):
raise Exception("contentManager is null")
self.datastore = contentManager
def openAIWrapper(self, gptFunc):
def wrapper(*args, **kwargs):
result = gptFunc(*args, **kwargs)
prompt = kwargs.get('prompt') or kwargs.get('conversation') or args[0]
prompt = json.dumps(prompt)
if self.datastore and result:
tokensUsed = gpt_utils.num_tokens_from_messages([prompt, result])
self.datastore.save('api_openai', tokensUsed, add=True)
return result
return wrapper
def elevenWrapper(self, audioFunc):
def wrapper(*args, **kwargs):
result = audioFunc(*args, **kwargs)
textInput = kwargs.get('text') or args[0]
if self.datastore and result:
self.datastore.save('api_eleven', len(textInput), add=True)
return result
return wrapper
def wrap_turbo(self):
func_name = "llm_completion"
module = __import__("gpt_utils", fromlist=["llm_completion"])
func = getattr(module, func_name)
wrapped_func = self.openAIWrapper(func)
setattr(module, func_name, wrapped_func)
def wrap_eleven(self):
func_name = "generateVoice"
module = __import__("audio_generation", fromlist=["generateVoice"])
func = getattr(module, func_name)
wrapped_func = self.elevenWrapper(func)
setattr(module, func_name, wrapped_func)
def initiateAPITracking(self):
self.wrap_turbo()
self.wrap_eleven()
================================================
FILE: shortGPT/tracking/cost_analytics.py
================================================
import numpy as np
from shortGPT.database.content_database import ContentDatabase
db = ContentDatabase()
all = []
# Calculate average and price of the average for OpenAI
openai_array = [short.get('api_openai') for short in all]
avr_openai = np.mean(openai_array)
OPENAI_CONST = 0.002 / 1000
price_openai = avr_openai * OPENAI_CONST
max_openai = max(openai_array)
price_max_openai = max_openai * OPENAI_CONST
# Calculate average and price of the average for Eleven
eleven_array = [short.get('api_openai') for short in all]
avr_eleven = np.mean(eleven_array)
ELEVEN_CONST = 0.3 / 1000
price_eleven = avr_eleven * ELEVEN_CONST
max_eleven = max(eleven_array)
price_max_eleven = max_eleven * ELEVEN_CONST
# Print results
print("OpenAI:")
print("- Average:", avr_openai)
print("- Price of the average:", price_openai)
print("- Max:", max_openai)
print("- Price of the max:", price_max_openai)
print("Eleven:")
print("- Average:", avr_eleven)
print("- Price of the average:", price_eleven)
print("- Max:", max_eleven)
print("- Price of the max:", price_max_eleven)
# for id in ids:
# builder = AskingRedditorShortBuilder(AR, id)
# print(id, builder.dataManager.getVideoPath())
#createShorts(30, 'AskingRedditors')
# AR = ChannelManager("AskingRedditors")
# newShort = AskingRedditorShortBuilder(channelDB= AR, short_id="FyhKkqx9xDxTEtRpanSD")
# print(newShort.channelDB.getStaticEditingAsset('background_onepiece'))
# print(newShort.channelDB.getStaticEditingAsset('reddit_template_dark'))
# print(newShort.channelDB.getStaticEditingAsset('subscribe_animation'))
#print("Scraping requests remaining: ",image_api.getScrapingCredits())
================================================
FILE: shortGPT/utils/cli.py
================================================
from shortGPT.utils.requirements import Requirements
class CLI:
@staticmethod
def display_header():
'''Display the header of the CLI'''
CLI.display_green_text('''
.d88888b dP dP .88888. 888888ba d888888P .88888. 888888ba d888888P
88. "' 88 88 d8' `8b 88 `8b 88 d8' `88 88 `8b 88
`Y88888b. 88aaaaa88 88 88 88aaaa8P' 88 88 88aaaa8P' 88
`8b 88 88 88 88 88 `8b. 88 88 YP88 88 88
d8' .8P 88 88 Y8. .8P 88 88 88 Y8. .88 88 88
Y88888P dP dP `8888P' dP dP dP `88888' dP dP
''')
CLI.display_green_text("Welcome to ShortGPT! This is an experimental AI framework to automate all aspects of content creation.")
print("")
CLI.display_requirements_check()
@staticmethod
def display_help():
'''Display help'''
print("Usage: python shortGPT.py [options]")
print("")
print("Options:")
print(" -h, --help show this help message and exit")
@staticmethod
def display_requirements_check():
'''Display information about the system and requirements'''
print("Checking requirements...")
requirements_manager = Requirements()
print(" - Requirements : List of requirements and installed version:")
all_req_versions = requirements_manager.get_all_requirements_versions()
for req_name, req_version in all_req_versions.items():
if req_version is None:
CLI.display_red_text(f"---> Error : {req_name} is not installed")
print(f"{req_name}=={req_version}")
print("")
# Skipping for now, because it assumes package have the same name as the python import itself, which is not true most sometimes.
# if not requirements_manager.is_all_requirements_installed():
# CLI.display_red_text("Error : Some requirements are missing")
# print("Please install the missing requirements using the following command :")
# print("pip install -r requirements.txt")
# print("")
# requirements_manager.get_all_requirements_not_installed()
# print("")
class bcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKCYAN = '\033[96m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
@staticmethod
def display_error(error_message, stack_trace):
'''Display an error message in the console'''
print(CLI.bcolors.FAIL + "ERROR : " + error_message + CLI.bcolors.ENDC)
print(stack_trace)
print("If the problem persists, don't hesitate to contact our support. We're here to assist you.")
print("Get Help on Discord : https://discord.gg/qn2WJaRH")
@staticmethod
def get_console_green_text(text):
'''Get the text in green color'''
return CLI.bcolors.OKGREEN + text + CLI.bcolors.ENDC
@staticmethod
def get_console_red_text(text):
'''Get the text in red color'''
return CLI.bcolors.FAIL + text + CLI.bcolors.ENDC
@staticmethod
def get_console_yellow_text(text):
'''Get the text in yellow color'''
return CLI.bcolors.WARNING + text + CLI.bcolors.ENDC
@staticmethod
def get_console_blue_text(text):
return CLI.bcolors.OKBLUE + text + CLI.bcolors.ENDC
@staticmethod
def get_console_bold_text(text):
return CLI.bcolors.BOLD + text + CLI.bcolors.ENDC
@staticmethod
def get_console_underline_text(text):
return CLI.bcolors.UNDERLINE + text + CLI.bcolors.ENDC
@staticmethod
def get_console_cyan_text(text):
return CLI.bcolors.OKCYAN + text + CLI.bcolors.ENDC
@staticmethod
def get_console_header_text(text):
return CLI.bcolors.HEADER + text + CLI.bcolors.ENDC
@staticmethod
def get_console_text(text, color):
return color + text + CLI.bcolors.ENDC
@staticmethod
def display_blue_text(text):
print(CLI.get_console_blue_text(text))
@staticmethod
def display_green_text(text):
print(CLI.get_console_green_text(text))
@staticmethod
def display_red_text(text):
print(CLI.get_console_red_text(text))
@staticmethod
def display_yellow_text(text):
print(CLI.get_console_yellow_text(text))
@staticmethod
def display_bold_text(text):
print(CLI.get_console_bold_text(text))
@staticmethod
def display_underline_text(text):
print(CLI.get_console_underline_text(text))
@staticmethod
def display_cyan_text(text):
print(CLI.get_console_cyan_text(text))
@staticmethod
def display_header_text(text):
print(CLI.get_console_header_text(text))
================================================
FILE: shortGPT/utils/requirements.py
================================================
import os
import platform
class Requirements:
'''Manage requirements for the project'''
def __init__(self):
self.package_path = os.path.dirname(os.path.realpath(__file__))
self.requirements_path = os.path.join(self.package_path, '..', '..', 'requirements.txt')
def get_list_requirements(self):
'''Get the list of requirements packages from requirements.txt'''
with open(self.requirements_path) as f:
requirements = f.read().splitlines()
# remove comments and empty lines
requirements = [line for line in requirements if not line.startswith('#')]
requirements = [line for line in requirements if line.strip()]
# extract package name from protocol
requirements = [line.split('/')[-1] for line in requirements if not line.startswith('git+')]
requirements = [line.split('/')[-1] for line in requirements if not line.startswith('http')]
requirements = [line.split('/')[-1] for line in requirements if not line.startswith('https')]
requirements = [line.split('/')[-1] for line in requirements if not line.startswith('ssh')]
requirements = [line.split('/')[-1] for line in requirements if not line.startswith('git')]
# sort alphabetically
requirements.sort()
return requirements
def get_os_name(self):
'''Get the name of the operating system'''
return platform.system()
def get_os_version(self):
'''Get the version of the operating system'''
return platform.version()
def get_python_version(self):
'''Get the version of Python installed'''
return platform.python_version()
def is_all_requirements_installed(self):
'''Check if all requirements are installed'''
requirements = self.get_list_requirements()
for requirement in requirements:
if not self.is_requirement_installed(requirement):
return False
return True
def is_requirement_installed(self, package_name):
'''Check if a package is installed'''
import importlib
try:
importlib.import_module(package_name)
return True
except ImportError:
return False
def get_version(self, package_name):
'''Get the version of a package'''
import pkg_resources
try:
return pkg_resources.get_distribution(package_name).version
except:
return None
def get_all_requirements_versions(self):
'''Get the versions of all requirements'''
requirements = self.get_list_requirements()
versions = {}
for requirement in requirements:
versions[requirement] = self.get_version(requirement)
return versions
def get_all_requirements_not_installed(self):
'''Get the list of all requirements not installed'''
requirements = self.get_list_requirements()
not_installed = {}
for requirement in requirements:
# if version is None then the package is not installed
if self.get_version(requirement) is None:
not_installed[requirement] = self.get_version(requirement)
return not_installed
if __name__ == '__main__':
'''Display information about the system and requirements'''
requirements_manager = Requirements()
# Skipping for now, because it assumes package have the same name as the python import itself, which is not true most sometimes.
# if not requirements_manager.is_all_requirements_installed():
# print("Error : Some requirements are missing")
# print("Please install all requirements from requirements.txt")
# print("You can install them by running the following command:")
# print("pip install -r requirements.txt")
print(f"System information:")
print(f"OS name : {requirements_manager.get_os_name()}")
print(f"OS version : {requirements_manager.get_os_version()}")
print(f"Python version : {requirements_manager.get_python_version()}")
# list all requirements and their versions
print("List of all requirements and their versions:")
all_req_versions = requirements_manager.get_all_requirements_versions()
for req_name, req_version in all_req_versions.items():
print(f"{req_name}=={req_version}")
print("List of all requirements not installed:")
all_req_not_installed = requirements_manager.get_all_requirements_not_installed()
for req_name, req_version in all_req_not_installed.items():
print(f"{req_name}=={req_version}")
================================================
FILE: videos/.gitignore
================================================
# Ignore everything in this directory
*
# Except this file
!.gitignore
!archive/