Repository: abi/autocommit
Branch: main
Commit: 7a4e9af425c7
Files: 13
Total size: 17.2 KB

Directory structure:
gitextract_ahig21ds/

├── .gitattributes
├── .gitignore
├── .vscode/
│   ├── launch.json
│   └── settings.json
├── LICENSE
├── README.md
├── autocommit/
│   ├── __init__.py
│   ├── commit.py
│   └── llm.py
├── null
├── pyproject.toml
├── requirements.txt
└── scan_repo.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
# Auto detect text files and perform LF normalization
* text=auto


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Mac
.DS_Store


================================================
FILE: .vscode/launch.json
================================================
{
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Current File",
            "type": "python",
            "request": "launch",
            "program": "${file}",
            "console": "integratedTerminal",
            "justMyCode": true
        }
    ]
}

================================================
FILE: .vscode/settings.json
================================================
{
  "editor.tabCompletion": "on",
  "diffEditor.codeLens": true,
  "python.analysis.typeCheckingMode": "basic",
  "python.formatting.provider": "black"
}


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2022 Abi Raja

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# aicommit - AI-generated Git commit messages

A simple CLI tool that generates 5 commit message suggestions for the changes in your current Git repo. After you pick and edit the commit message you want, it commits the changes.

![CleanShot 2023-01-05 at 15 55 47](https://user-images.githubusercontent.com/23818/211055859-7fa8b320-e2d6-41c4-ac29-7f441364666d.gif)

### Installation

**`pip install aicommit`**

On first run, it will prompt you for your OpenAI API key. Sign up for OpenAI if you haven't. Grab your API key by going to the dropdown on the top right, selecting "View API Keys" and creating a new key. Copy this key.

**NOTE:** it commits all changes, untracked and unstaged, in your current repo.

# Feedback/thoughts

Ping me on [Twitter](https://twitter.com/_abi_)

## scan_repo

`scan_repo` runs through all the commits in your repository to generate a CSV with AI-suggested commit messages side-by-side with your original commit messages. [Read more about this tool here](https://abiraja.substack.com/p/ai-generated-git-commit-messages)

To run scan_repo, copy `.env.example` to `.env` and add your OPENAI_KEY.

To update the repo it runs on, modify the `GITHUB_REPO_URL` variable at the top of `scan_repo.py`


# Publishing to pip

Version bump and clear out `dist/`

```
python3 -m build
twine check dist/*
twine upload dist/* --verbose
```


================================================
FILE: autocommit/__init__.py
================================================


================================================
FILE: autocommit/commit.py
================================================
from __future__ import print_function, unicode_literals
from PyInquirer import prompt as py_inquirer_prompt, style_from_dict, Token
import subprocess

import keyring

import os
import re
import markdown

from pydantic import BaseModel
from langchain.llms import OpenAI
from langchain.prompts import BasePromptTemplate
from dotenv import load_dotenv

load_dotenv()

OPENAI_KEY = os.getenv("OPENAI_KEY")


class CustomPromptTemplate(BasePromptTemplate, BaseModel):
    template: str

    def format(self, **kwargs) -> str:
        c_kwargs = {k: v for k, v in kwargs.items()}
        return self.template.format(**c_kwargs)


prompt = CustomPromptTemplate(
    input_variables=["diff"],
    template="""
    What follows "-------" is a git diff for a potential commit.
    Reply with a markdown unordered list of 5 possible, different Git commit messages 
    (a Git commit message should be concise but also try to describe 
    the important changes in the commit), order the list by what you think 
    would be the best commit message first, and don't include any other text 
    but the 5 messages in your response.
    ------- 
    {diff}
    -------
""",
)


def generate_suggestions(diff, openai_api_key=OPENAI_KEY):

    llm = OpenAI(
        temperature=0.2,
        openai_api_key=openai_api_key,
        max_tokens=100,
        model_name="text-davinci-003",
    )  # type: ignore

    # query OpenAI
    formattedPrompt = prompt.format(diff=diff)
    response = llm(formattedPrompt)

    # Convert the markdown string to HTML
    html = markdown.markdown(response)

    # Use a regular expression to extract the list items from the HTML
    items = re.findall(r"<li>(.*?)</li>", html)
    return items


SERVICE_ID = "auto-commit-cli"


def prompt_for_openai_api_key():
    questions = [
        {
            "type": "input",
            "name": "openai_api_key",
            "message": "Please enter your OpenAI API key:",
        }
    ]
    answers = py_inquirer_prompt(questions)
    openai_api_key = answers["openai_api_key"]
    keyring.set_password(SERVICE_ID, "user", openai_api_key)
    return openai_api_key


def main():
    # Prompt for OpenAI API key if it's not set
    openai_api_key = keyring.get_password(SERVICE_ID, "user")
    if openai_api_key is None:
        openai_api_key = prompt_for_openai_api_key()

    #  Get the diff including untracked files (see https://stackoverflow.com/a/52093887)
    git_command = "git --no-pager diff; for next in $( git ls-files --others --exclude-standard ) ; do git --no-pager diff --no-index /dev/null $next; done;"

    # Windows support
    if os.name == "nt":
        git_command = """git diff && for /f "delims=" %a in ('git ls-files --others --exclude-standard') do (git diff --no-index /dev/null %a)"""

    output = subprocess.run(
        git_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )

    if len(output.stderr) > 0:
        error = output.stderr.decode("utf-8")
        print("There was an error retrieving the current diff: ")
        if error.startswith("warning: Not a git repository"):
            print("You're not inside a git repo. Please run it from inside a git repo.")
        else:
            print(error)
        exit(-1)

    diff = output.stdout.decode("utf8")
    # Trim the diff
    diff = diff.strip()

    if len(diff) == 0:
        print("Diff is empty. Nothing to commit.")
        exit(0)

    suggestions = []

    try:
        suggestions = generate_suggestions(diff[:7000], openai_api_key=openai_api_key)
    except Exception as e:
        print("There was an error generating suggestions from OpenAI: ")
        # Prompt for OpenAI API key if it's incorrect
        if "Incorrect API key provided" in str(e):
            openai_api_key = prompt_for_openai_api_key()
            print("Please re-run the command now.")
        else:
            print(e)
        exit(-1)

    if len(suggestions) == 0:
        print("No suggestions found.")
        exit(0)

    # Prompt user with commit messages and choices and allow edits
    custom_style = style_from_dict(
        {
            Token.Separator: "#6C6C6C",
            Token.QuestionMark: "#000000",
            Token.Selected: "#FFFF00 bold",
            Token.Pointer: "#FF9D00 bold",
            Token.Instruction: "",
            Token.Answer: "#EA9104 bold",
            Token.Question: "",
        }
    )

    questions = [
        {
            "type": "list",
            "name": "commit_message",
            "message": "Commit message suggestions:",
            "choices": [f"{i + 1}. {item}" for i, item in enumerate(suggestions)],
            "filter": lambda val: val[3:],
        }
    ]
    answers = py_inquirer_prompt(questions, style=custom_style)
    answers = py_inquirer_prompt(
        [
            {
                "type": "input",
                "name": "final_commit_message",
                "message": "Confirm or edit the commit message:",
                "default": answers.get("commit_message"),
            },
        ]
    )

    # Commit the changes
    git_command = (
        'git add -A; git commit -m "' + answers.get("final_commit_message") + '"'
    )

    # Windows support
    if os.name == "nt":
        git_command = (
            'git add -A && git commit -m "' + answers.get("final_commit_message") + '"'
        )

    output = subprocess.run(
        git_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )

    if len(output.stderr) > 0:
        error = output.stderr.decode("utf-8")
        print("There was an error committing:")
        print(error)
    else:
        print("Commit successful with message:", answers.get("final_commit_message"))


if __name__ == "__main__":
    main()


================================================
FILE: autocommit/llm.py
================================================
import os
import re
import markdown

from pydantic import BaseModel
from langchain.llms import OpenAI
from langchain.prompts import BasePromptTemplate
from dotenv import load_dotenv

load_dotenv()

OPENAI_KEY = os.getenv("OPENAI_KEY")


class CustomPromptTemplate(BasePromptTemplate, BaseModel):
    template: str

    def format(self, **kwargs) -> str:
        c_kwargs = {k: v for k, v in kwargs.items()}
        return self.template.format(**c_kwargs)


prompt = CustomPromptTemplate(
    input_variables=["diff"], template="""
    What follows "-------" is a git diff for a potential commit.
    Reply with a markdown unordered list of 5 possible, different Git commit messages 
    (a Git commit message should be concise but also try to describe 
    the important changes in the commit), order the list by what you think 
    would be the best commit message first, and don't include any other text 
    but the 5 messages in your response.
    ------- 
    {diff}
    -------
""")


def generate_suggestions(diff, openai_api_key=OPENAI_KEY):

    llm = OpenAI(temperature=0.2, openai_api_key=openai_api_key,
                 max_tokens=100, model_name="text-davinci-003")  # type: ignore

    # query OpenAI
    formattedPrompt = prompt.format(diff=diff)
    response = llm(formattedPrompt)

    # Convert the markdown string to HTML
    html = markdown.markdown(response)

    # Use a regular expression to extract the list items from the HTML
    items = re.findall(r'<li>(.*?)</li>', html)
    return items


================================================
FILE: null
================================================


================================================
FILE: pyproject.toml
================================================
[project]
name = "aicommit"
version = "0.1.0"
authors = [
  { name="Abi Raja", email="abimanyuraja@gmail.com" },
]
description = "Generate AI powered commit messages"
readme = "README.md"
requires-python = ">=3.7"
dependencies = [
    'async-timeout ~= 4.0',
    'black ~= 22.12.0',
    'certifi ~= 2022.12.7',
    'cffi ~= 1.15.1',
    'charset-normalizer ~= 2.1.1',
    'click ~= 8.1.3',
    'et-xmlfile ~= 1.1.0',
    'gitdb ~= 4.0.10',
    'GitPython ~= 3.1.29',
    'idna ~= 3.4',
    'importlib-metadata ~= 5.2.0',
    'jaraco.classes ~= 3.2.3',
    'keyring ~= 23.13.1',
    'langchain ~= 0.0.45',
    'Markdown ~= 3.4.1',
    'more-itertools ~= 9.0.0',
    'mypy-extensions ~= 0.4.3',
    'numpy ~= 1.24.0',
    'openai ~= 0.25.0',
    'openpyxl ~= 3.0.10',
    'pandas ~= 1.5.2',
    'pandas-stubs ~= 1.5.2.221213',
    'pathspec ~= 0.10.3',
    'platformdirs ~= 2.6.0',
    'prompt-toolkit ~= 1.0.14',
    'pycparser ~= 2.21',
    'pydantic ~= 1.10.2',
    'pygit2 ~= 1.11.1',
    'Pygments ~= 2.13.0',
    'PyInquirer ~= 1.0.3',
    'python-dateutil ~= 2.8.2',
    'python-dotenv ~= 0.21.0',
    'pytz ~= 2022.7',
    'PyYAML ~= 6.0',
    'redis ~= 4.4.0',
    'regex ~= 2022.10.31',
    'requests ~= 2.28.1',
    'six ~= 1.16.0',
    'smmap ~= 5.0.0',
    'SQLAlchemy ~= 1.4.45',
    'tomli ~= 2.0.1',
    'tqdm ~= 4.64.1',
    'types-pytz ~= 2022.7.0.0',
    'typing_extensions ~= 4.4.0',
    'urllib3 ~= 1.26.13',
    'wcwidth ~= 0.2.5',
    'zipp ~= 3.11.0'
]

[project.urls]
"Homepage" = "https://github.com/abi/autocommit"
"Bug Tracker" = "https://github.com/abi/autocommit/issues"

[project.scripts]
aicommit = "autocommit.commit:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"


================================================
FILE: requirements.txt
================================================
async-timeout==4.0.2
black==22.12.0
certifi==2022.12.7
cffi==1.15.1
charset-normalizer==2.1.1
click==8.1.3
et-xmlfile==1.1.0
gitdb==4.0.10
GitPython==3.1.29
idna==3.4
importlib-metadata==5.2.0
jaraco.classes==3.2.3
keyring==23.13.1
langchain==0.0.45
Markdown==3.4.1
more-itertools==9.0.0
mypy-extensions==0.4.3
numpy==1.24.0
openai==0.25.0
openpyxl==3.0.10
pandas==1.5.2
pandas-stubs==1.5.2.221213
pathspec==0.10.3
platformdirs==2.6.0
prompt-toolkit==1.0.14
pycparser==2.21
pydantic==1.10.2
pygit2==1.11.1
Pygments==2.13.0
PyInquirer==1.0.3
python-dateutil==2.8.2
python-dotenv==0.21.0
pytz==2022.7
PyYAML==6.0
redis==4.4.0
regex==2022.10.31
requests==2.28.1
six==1.16.0
smmap==5.0.0
SQLAlchemy==1.4.45
tomli==2.0.1
tqdm==4.64.1
types-pytz==2022.7.0.0
typing_extensions==4.4.0
urllib3==1.26.13
wcwidth==0.2.5
zipp==3.11.0


================================================
FILE: scan_repo.py
================================================
from collections import namedtuple
import os
import shutil
import csv
import sys
from dotenv import load_dotenv
import git
import pygit2

from autocommit.llm import generate_suggestions

load_dotenv()

GIT_REPO_URL = os.getenv("GIT_REPO_URL")
if GIT_REPO_URL is None:
    raise ValueError("GIT_REPO_URL is not set")

temp_repo_dir = "/tmp/ai-commit-msg-repo"

# Delete the directory if it exists
if os.path.exists(temp_repo_dir):
    shutil.rmtree(temp_repo_dir)

# Clone the repository
git.Repo.clone_from(GIT_REPO_URL, temp_repo_dir)

# Open the repository
repo = pygit2.Repository(temp_repo_dir)
commits = repo.walk(repo.head.target, pygit2.GIT_SORT_TIME)

# Iterate over the commits and organize the data we need
commit_objects = []
CommitObject = namedtuple("CommitObject", ["sha", "message", "diff"])
for commit in commits:
    if len(commit.parents) > 0:
        diff = repo.diff(commit.parents[0], commit).patch
    else:
        diff = ""
    commit_objects.append(CommitObject(commit.id, commit.message, diff))

filtered_commit_objects = commit_objects

writer = csv.writer(sys.stdout, quoting=csv.QUOTE_MINIMAL)

for commit in filtered_commit_objects:
    message = commit.message.replace("\n", ";")

    # Skip merge commits
    if message.startswith("Merge"):
        continue

    # text-davinci-003 supports 4000 tokens. Let's use upto 3500 tokens for the prompt.
    # 3500 tokens = 14,000 characters (https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them)
    # But in practice, > 7000 seems to exceed the limit
    suggestions = generate_suggestions(commit.diff[:7000])

    # generate CSV row
    for item in suggestions:
        # Only inlude SHA & original message for the first row
        if item == suggestions[0]:
            writer.writerow([commit.sha, message, item])
        else:
            writer.writerow(["", "", item])