Repository: ahmedkhaleel2004/gitdiagram Branch: main Commit: 1e08d22dfd3b Files: 112 Total size: 230.4 KB Directory structure: gitextract_484_xfnt/ ├── .github/ │ ├── FUNDING.yml │ └── workflows/ │ ├── ci.yml │ └── deploy.yml ├── .gitignore ├── .nvmrc ├── CLAUDE.md ├── LICENSE ├── README.md ├── backend/ │ ├── .python-version │ ├── Dockerfile │ ├── app/ │ │ ├── __init__.py │ │ ├── core/ │ │ │ ├── errors.py │ │ │ └── observability.py │ │ ├── main.py │ │ ├── prompts.py │ │ ├── routers/ │ │ │ └── generate.py │ │ ├── services/ │ │ │ ├── github_service.py │ │ │ ├── mermaid_service.py │ │ │ ├── model_config.py │ │ │ ├── openai_service.py │ │ │ └── pricing.py │ │ └── utils/ │ │ └── format_message.py │ ├── deploy.sh │ ├── entrypoint.sh │ ├── nginx/ │ │ ├── api.conf │ │ └── setup_nginx.sh │ ├── package.json │ ├── pyproject.toml │ ├── scripts/ │ │ └── validate_mermaid.mjs │ └── tests/ │ ├── conftest.py │ ├── test_generate_router.py │ ├── test_generate_utils.py │ └── test_pricing.py ├── components.json ├── docker-compose.yml ├── docs/ │ ├── dev-setup.md │ └── railway-backend.md ├── drizzle.config.ts ├── eslint.config.mjs ├── next.config.js ├── package.json ├── postcss.config.js ├── prettier.config.js ├── src/ │ ├── app/ │ │ ├── [username]/ │ │ │ └── [repo]/ │ │ │ ├── page.tsx │ │ │ └── repo-page-client.tsx │ │ ├── _actions/ │ │ │ ├── cache.ts │ │ │ └── repo.ts │ │ ├── api/ │ │ │ ├── generate/ │ │ │ │ ├── cost/ │ │ │ │ │ └── route.ts │ │ │ │ └── stream/ │ │ │ │ └── route.ts │ │ │ └── healthz/ │ │ │ └── route.ts │ │ ├── layout.tsx │ │ ├── page.tsx │ │ └── providers.tsx │ ├── components/ │ │ ├── action-button.tsx │ │ ├── api-key-button.tsx │ │ ├── api-key-dialog.tsx │ │ ├── copy-button.tsx │ │ ├── export-dropdown.tsx │ │ ├── footer.tsx │ │ ├── header-client.tsx │ │ ├── header.tsx │ │ ├── hero.tsx │ │ ├── loading-animation.tsx │ │ ├── loading.tsx │ │ ├── main-card.tsx │ │ ├── mermaid-diagram.test.tsx │ │ ├── mermaid-diagram.tsx │ │ ├── private-repos-dialog.tsx │ │ ├── theme-toggle.tsx │ │ └── ui/ │ │ ├── button.tsx │ │ ├── card.tsx │ │ ├── dialog.tsx │ │ ├── input.tsx │ │ ├── progress.tsx │ │ ├── sonner.tsx │ │ ├── switch.tsx │ │ ├── textarea.tsx │ │ └── tooltip.tsx │ ├── env.js │ ├── features/ │ │ └── diagram/ │ │ ├── api.ts │ │ ├── export.ts │ │ ├── github-url.test.ts │ │ ├── github-url.ts │ │ ├── sse.test.ts │ │ ├── sse.ts │ │ └── types.ts │ ├── hooks/ │ │ ├── diagram/ │ │ │ ├── useDiagramExport.ts │ │ │ ├── useDiagramStream.test.ts │ │ │ └── useDiagramStream.ts │ │ ├── useDiagram.ts │ │ └── useStarReminder.tsx │ ├── lib/ │ │ ├── exampleRepos.ts │ │ └── utils.ts │ ├── server/ │ │ ├── db/ │ │ │ ├── index.ts │ │ │ └── schema.ts │ │ ├── generate/ │ │ │ ├── format.ts │ │ │ ├── github.ts │ │ │ ├── mermaid.test.ts │ │ │ ├── mermaid.ts │ │ │ ├── model-config.ts │ │ │ ├── openai.ts │ │ │ ├── pricing.test.ts │ │ │ ├── pricing.ts │ │ │ ├── prompts.ts │ │ │ └── types.ts │ │ └── github-stars.ts │ └── styles/ │ └── globals.css ├── start-database.sh ├── tailwind.config.ts ├── tsconfig.json ├── vitest.config.ts └── vitest.setup.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: ahmedkhaleel2004 tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry polar: # Replace with a single Polar username buy_me_a_coffee: # Replace with a single Buy Me a Coffee username thanks_dev: # Replace with a single thanks.dev username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: pull_request: push: branches: - main jobs: frontend: runs-on: ubuntu-latest env: POSTGRES_URL: postgresql://postgres:password@localhost:5432/gitdiagram steps: - uses: actions/checkout@v4 - name: Setup pnpm uses: pnpm/action-setup@v4 with: version: 10.30.0 - name: Setup Node.js uses: actions/setup-node@v4 with: node-version-file: ".nvmrc" cache: "pnpm" - name: Install dependencies run: pnpm install --frozen-lockfile - name: Lint run: pnpm lint - name: Typecheck run: pnpm typecheck - name: Frontend tests run: pnpm test - name: Build run: pnpm build backend: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install uv run: pip install uv==0.5.24 - name: Sync backend dependencies run: cd backend && uv sync --frozen --no-install-project - name: Import sanity run: cd backend && uv run python -m compileall app - name: Backend tests run: cd backend && uv run pytest -q ================================================ FILE: .github/workflows/deploy.yml ================================================ name: Deploy to EC2 on: # Disabled for automatic deploys after migrating to Next.js/Vercel backend. # Kept as legacy workflow for historical reference / manual fallback use. workflow_dispatch: inputs: confirm_legacy_ec2_deploy: description: "Type true to run legacy EC2 deploy" required: false default: "false" jobs: deploy: if: ${{ github.event.inputs.confirm_legacy_ec2_deploy == 'true' }} runs-on: ubuntu-latest # Add concurrency to prevent multiple deployments running at once concurrency: group: production cancel-in-progress: true steps: - uses: actions/checkout@v4 - name: Deploy to EC2 uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 # v1.2.5 with: host: ${{ secrets.EC2_HOST }} username: ubuntu key: ${{ secrets.EC2_SSH_KEY }} script: | cd ~/gitdiagram git fetch origin main git checkout main git pull --ff-only origin main sudo chmod +x ./backend/nginx/setup_nginx.sh sudo ./backend/nginx/setup_nginx.sh chmod +x ./backend/deploy.sh ./backend/deploy.sh ================================================ FILE: .gitignore ================================================ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. # dependencies /node_modules /.pnp .pnp.js # testing /coverage # database /prisma/db.sqlite /prisma/db.sqlite-journal db.sqlite # next.js /.next/ /out/ next-env.d.ts # production /build # misc .DS_Store *.pem # debug npm-debug.log* yarn-debug.log* yarn-error.log* .pnpm-debug.log* # local env files # do not commit any .env files to git, except for the .env.example file. https://create.t3.gg/en/usage/env-variables#using-environment-variables .env .env*.local .env-e # vercel .vercel # typescript *.tsbuildinfo # idea files .idea __pycache__/ venv backend/.venv .venv # vscode .vscode/ ================================================ FILE: .nvmrc ================================================ 22 ================================================ FILE: CLAUDE.md ================================================ # CLAUDE.md This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. ## Project Overview GitDiagram converts GitHub repositories into interactive Mermaid diagrams using a 3-stage LLM pipeline. It's a full-stack app with a Next.js frontend (Vercel) and FastAPI backend (Railway). ## Commands ### Frontend (pnpm, Node 22) ```bash pnpm install # Install dependencies pnpm dev # Start Next.js dev server (Turbo) pnpm build # Production build pnpm lint # ESLint pnpm check # Type-check + lint pnpm test # Vitest (frontend unit tests) pnpm format:write # Prettier formatting ``` ### Backend (Python 3.12, uv) ```bash cd backend uv sync --no-install-project # Install pinned deps into .venv uv run pytest -q # Run all backend tests uv run pytest tests/path/test_file.py::test_name # Run single test uv run python -m compileall app # Compile check ``` ### Database ```bash pnpm db:push # Push schema changes to Postgres pnpm db:generate # Generate Drizzle migration files pnpm db:studio # Open Drizzle Studio ``` ### Local Development ```bash # Start local Postgres ./start-database.sh # Start FastAPI backend (Docker, recommended for production parity) docker-compose up --build -d docker-compose logs -f api # OR start FastAPI backend directly pnpm dev:backend # runs uvicorn via uv ``` To route the Next.js frontend to a local FastAPI backend, set in `.env`: ``` NEXT_PUBLIC_USE_LEGACY_BACKEND=true NEXT_PUBLIC_API_DEV_URL=http://localhost:8000 ``` ## Architecture ### Dual-Backend Design The app supports two generation backends controlled by `NEXT_PUBLIC_USE_LEGACY_BACKEND`: - **FastAPI** (`backend/`) on Railway — primary production path - **Next.js Route Handlers** (`src/app/api/generate/`) — legacy fallback Both expose the same SSE streaming API. The frontend (`src/features/diagram/api.ts`) routes to one or the other transparently. ### 3-Stage LLM Pipeline Diagram generation uses three sequential OpenAI streaming calls: 1. **Explanation** — understands the repo structure 2. **Component Mapping** — maps components to file paths (XML tags extracted) 3. **Mermaid Diagram** — generates Mermaid syntax with click events After stage 3, Mermaid syntax is validated (via `backend/scripts/validate_mermaid.mjs` or `src/server/generate/mermaid.ts`) and auto-fixed for up to 3 attempts if invalid. Prompts live in `backend/app/prompts.py` and `src/server/generate/prompts.ts`. ### Streaming State Machine SSE events flow through states: `idle → started → explanation_* → mapping_* → diagram_* → diagram_fix_* → complete` Frontend: `src/hooks/diagram/useDiagramStream.ts` manages state. Backend: `backend/app/routers/generate.py` emits events. ### GitHub Authentication Priority 1. User-supplied PAT (from localStorage) 2. `GITHUB_PAT` env var 3. GitHub App (CLIENT_ID + PRIVATE_KEY + INSTALLATION_ID) ### Caching Generated diagrams are cached in PostgreSQL (`gitdiagram_diagram_cache` table, schema at `src/server/db/schema.ts`) keyed by `(username, repo)`. Server action: `src/app/_actions/cache.ts`. ### Path Aliases TypeScript uses `~/*` → `./src/*`. ## Key File Locations | Concern | Frontend | Backend | |---|---|---| | Prompts | `src/server/generate/prompts.ts` | `backend/app/prompts.py` | | GitHub client | `src/server/generate/github.ts` | `backend/app/services/github_service.py` | | OpenAI streaming | `src/server/generate/openai.ts` | `backend/app/services/openai_service.py` | | Mermaid validation | `src/server/generate/mermaid.ts` | `backend/app/services/mermaid_service.py` | | Stream endpoint | `src/app/api/generate/stream/` | `backend/app/routers/generate.py` | | DB schema | `src/server/db/schema.ts` | — | | Frontend API client | `src/features/diagram/api.ts` | — | | Main diagram hook | `src/hooks/useDiagram.ts` | — | ## Environment Variables Minimum required (see `.env.example` for full list): - `POSTGRES_URL` — Neon serverless Postgres - `OPENAI_API_KEY` — used for all generation stages - `GITHUB_PAT` — optional but avoids GitHub rate limits - `OPENAI_MODEL` — single model for all three pipeline stages ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 Ahmed Khaleel Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ [![Image](./docs/readme_img.png "GitDiagram Front Page")](https://gitdiagram.com/) ![License](https://img.shields.io/badge/license-MIT-blue.svg) [![Kofi](https://img.shields.io/badge/Kofi-F16061.svg?logo=ko-fi&logoColor=white)](https://ko-fi.com/ahmedkhaleel2004) # GitDiagram Turn any GitHub repository into an interactive diagram for visualization in seconds. You can also replace `hub` with `diagram` in any Github URL to access its diagram. ## 🚀 Features - 👀 **Instant Visualization**: Convert any GitHub repository structure into a system design / architecture diagram - 🎨 **Interactivity**: Click on components to navigate directly to source files and relevant directories - ⚡ **Fast Generation**: Powered by OpenAI GPT-5.4 mini (configurable) for quick and accurate diagrams - 🖼️ **Export Options**: Copy Mermaid code or download the generated diagram as PNG - 🌐 **API Access**: Public API available for integration (WIP) ## ⚙️ Tech Stack - **Frontend**: Next.js, TypeScript, Tailwind CSS, ShadCN - **Backend**: FastAPI (Railway), with Next.js Route Handlers available as a fallback path - **Database**: PostgreSQL (with Drizzle ORM) - **AI**: OpenAI GPT-5.4 mini (via `OPENAI_MODEL`) - **Deployment**: Vercel (frontend) + Railway (backend) - **CI/CD**: GitHub Actions - **Analytics**: PostHog, Api-Analytics ## 🔄 Backend Architecture Update GitDiagram now runs its primary generation backend on FastAPI (deployed on Railway). Frontend calls are routed to the external backend by setting: - `NEXT_PUBLIC_USE_LEGACY_BACKEND=true` - `NEXT_PUBLIC_API_DEV_URL=https://` The variable name contains "LEGACY" for backward compatibility, but it now points to the primary external backend in production. ## 🤔 About I created this because I wanted to contribute to open-source projects but quickly realized their codebases are too massive for me to dig through manually, so this helps me get started - but it's definitely got many more use cases! Given any public (or private!) GitHub repository it generates diagrams in Mermaid.js with OpenAI's GPT-5.4 mini! (Previously Claude 3.5 Sonnet) I extract information from the file tree and README for details and interactivity (you can click components to be taken to relevant files and directories). Most of what you might call the "processing" of this app is done with prompt engineering and a 3-step streaming pipeline in the FastAPI backend under `/backend`. ## 🔒 How to diagram private repositories You can simply click on "Private Repos" in the header and follow the instructions by providing a GitHub personal access token with the `repo` scope. You can also self-host this app locally (backend separated as well!) with the steps below. ## 🛠️ Self-hosting / Local Development 1. Clone the repository ```bash git clone https://github.com/ahmedkhaleel2004/gitdiagram.git cd gitdiagram ``` 2. Install dependencies ```bash pnpm i ``` 3. Set up environment variables (create .env) ```bash cp .env.example .env ``` Then edit the `.env` file with your OpenAI API key and optional GitHub personal access token. 4. Start local database ```bash chmod +x start-database.sh ./start-database.sh ``` When prompted to generate a random password, input yes. The Postgres database will start in a container at `localhost:5432` 5. Initialize the database schema ```bash pnpm db:push ``` You can view and interact with the database using `pnpm db:studio` 6. Run frontend ```bash pnpm dev ``` You can now access the website at `localhost:3000`. Run FastAPI backend (recommended if you want parity with production): ```bash docker-compose up --build -d docker-compose logs -f api ``` To route frontend calls to the external backend, set: - `NEXT_PUBLIC_USE_LEGACY_BACKEND=true` - `NEXT_PUBLIC_API_DEV_URL=http://localhost:8000` For a full machine setup guide (Node/Python/uv versions + verification), see `docs/dev-setup.md`. Quick validation: ```bash pnpm check pnpm test pnpm build ``` Railway backend docs: `docs/railway-backend.md`. ## Contributing Contributions are welcome! Please feel free to submit a Pull Request. ## Acknowledgements Shoutout to [Romain Courtois](https://github.com/cyclotruc)'s [Gitingest](https://gitingest.com/) for inspiration and styling ## 🤔 Future Steps - Implement font-awesome icons in diagram - Implement an embedded feature like star-history.com but for diagrams. The diagram could also be updated progressively as commits are made. ================================================ FILE: backend/.python-version ================================================ 3.12 ================================================ FILE: backend/Dockerfile ================================================ FROM node:22.12.0-slim AS node-runtime FROM python:3.12-slim WORKDIR /app ENV ENVIRONMENT=production ENV PORT=8000 COPY --from=node-runtime /usr/local/bin/node /usr/local/bin/node COPY --from=node-runtime /usr/local/lib/node_modules /usr/local/lib/node_modules RUN ln -s /usr/local/lib/node_modules/npm/bin/npm-cli.js /usr/local/bin/npm && \ ln -s /usr/local/lib/node_modules/npm/bin/npx-cli.js /usr/local/bin/npx # Install uv inside the image COPY --from=ghcr.io/astral-sh/uv:0.5.24 /uv /uvx /bin/ # Copy dependency manifests first for better layer caching COPY pyproject.toml uv.lock ./ COPY package.json ./ COPY package-lock.json ./ # Install pinned runtime dependencies from uv lockfile RUN uv sync --frozen --no-dev --no-install-project RUN npm ci --omit=dev # Copy application code COPY . . RUN chmod +x /app/entrypoint.sh && \ sed -i 's/\r$//' /app/entrypoint.sh && \ ls -la /app/entrypoint.sh EXPOSE 8000 CMD ["/bin/bash", "/app/entrypoint.sh"] ================================================ FILE: backend/app/__init__.py ================================================ ================================================ FILE: backend/app/core/errors.py ================================================ from __future__ import annotations def api_error(code: str, message: str, **extra): payload = { "ok": False, "error": message, "error_code": code, } payload.update(extra) return payload def api_success(**data): payload = {"ok": True} payload.update(data) return payload ================================================ FILE: backend/app/core/observability.py ================================================ from __future__ import annotations import json import logging import time logger = logging.getLogger("gitdiagram.api") if not logger.handlers: logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") def log_event(event: str, **fields): logger.info( json.dumps( { "event": event, **fields, }, default=str, ) ) class Timer: def __init__(self): self.start = time.perf_counter() def elapsed_ms(self) -> int: return int((time.perf_counter() - self.start) * 1000) ================================================ FILE: backend/app/main.py ================================================ import os from api_analytics.fastapi import Analytics from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from app.core.errors import api_success from app.core.observability import log_event from app.routers import generate app = FastAPI() cors_origins = os.getenv("CORS_ORIGINS") if cors_origins: origins = [origin.strip() for origin in cors_origins.split(",") if origin.strip()] else: origins = [ "http://localhost:3000", "https://gitdiagram.com", "https://www.gitdiagram.com", ] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["GET", "POST", "OPTIONS"], allow_headers=["*"], ) api_analytics_key = os.getenv("API_ANALYTICS_KEY") if api_analytics_key: app.add_middleware(Analytics, api_key=api_analytics_key) app.include_router(generate.router) @app.get("/") async def root(): return api_success(message="Hello from GitDiagram API!") @app.get("/healthz") async def healthz(): log_event("healthz.ok") return api_success(status="ok") ================================================ FILE: backend/app/prompts.py ================================================ # This is our processing. This is where GitDiagram makes the magic happen # There is a lot of DETAIL we need to extract from the repository to produce detailed and accurate diagrams # I will immediately put out there that I'm trying to reduce costs. Theoretically, I could, for like 5x better accuracy, include most file content as well which would make for perfect diagrams, but thats too many tokens for my wallet, and would probably greatly increase generation time. (maybe a paid feature?) # THE PROCESS: # imagine it like this: # def prompt1(file_tree, readme) -> explanation of diagram # def prompt2(explanation, file_tree) -> maps relevant directories and files to parts of diagram for interactivity # def prompt3(explanation, map) -> Mermaid.js code # Note: Originally prompt1 and prompt2 were combined - but I tested it, and turns out mapping relevant dirs and files in one prompt along with generating detailed and accurate diagrams was difficult for Claude 3.5 Sonnet. It lost detail in the explanation and dedicated more "effort" to the mappings, so this is now its own prompt. # This is my first take at prompt engineering so if you have any ideas on optimizations please make an issue on the GitHub! SYSTEM_FIRST_PROMPT = """ You are tasked with explaining to a principal software engineer how to draw the best and most accurate system design diagram / architecture of a given project. This explanation should be tailored to the specific project's purpose and structure. To accomplish this, you will be provided with two key pieces of information: 1. The complete and entire file tree of the project including all directory and file names, which will be enclosed in tags in the users message. 2. The README file of the project, which will be enclosed in tags in the users message. Analyze these components carefully, as they will provide crucial information about the project's structure and purpose. Follow these steps to create an explanation for the principal software engineer: 1. Identify the project type and purpose: - Examine the file structure and README to determine if the project is a full-stack application, an open-source tool, a compiler, or another type of software imaginable. - Look for key indicators in the README, such as project description, features, or use cases. 2. Analyze the file structure: - Pay attention to top-level directories and their names (e.g., "frontend", "backend", "src", "lib", "tests"). - Identify patterns in the directory structure that might indicate architectural choices (e.g., MVC pattern, microservices). - Note any configuration files, build scripts, or deployment-related files. 3. Examine the README for additional insights: - Look for sections describing the architecture, dependencies, or technical stack. - Check for any diagrams or explanations of the system's components. 4. Based on your analysis, explain how to create a system design diagram that accurately represents the project's architecture. Include the following points: a. Identify the main components of the system (e.g., frontend, backend, database, building, external services). b. Determine the relationships and interactions between these components. c. Highlight any important architectural patterns or design principles used in the project. d. Include relevant technologies, frameworks, or libraries that play a significant role in the system's architecture. 5. Provide guidelines for tailoring the diagram to the specific project type: - For a full-stack application, emphasize the separation between frontend and backend, database interactions, and any API layers. - For an open-source tool, focus on the core functionality, extensibility points, and how it integrates with other systems. - For a compiler or language-related project, highlight the different stages of compilation or interpretation, and any intermediate representations. 6. Instruct the principal software engineer to include the following elements in the diagram: - Clear labels for each component - Directional arrows to show data flow or dependencies - Color coding or shapes to distinguish between different types of components 7. NOTE: Emphasize the importance of being very detailed and capturing the essential architectural elements. Don't overthink it too much, simply separating the project into as many components as possible is best. Present your explanation and instructions within tags, ensuring that you tailor your advice to the specific project based on the provided file tree and README content. """ # - A legend explaining any symbols or abbreviations used # ^ removed since it was making the diagrams very long # just adding some clear separation between the prompts # ************************************************************ # ************************************************************ SYSTEM_SECOND_PROMPT = """ You are tasked with mapping key components of a system design to their corresponding files and directories in a project's file structure. You will be provided with a detailed explanation of the system design/architecture and a file tree of the project. First, carefully read the system design explanation which will be enclosed in tags in the users message. Then, examine the file tree of the project which will be enclosed in tags in the users message. Your task is to analyze the system design explanation and identify key components, modules, or services mentioned. Then, try your best to map these components to what you believe could be their corresponding directories and files in the provided file tree. Guidelines: 1. Focus on major components described in the system design. 2. Look for directories and files that clearly correspond to these components. 3. Include both directories and specific files when relevant. 4. If a component doesn't have a clear corresponding file or directory, simply dont include it in the map. Now, provide your final answer in the following format: 1. [Component Name]: [File/Directory Path] 2. [Component Name]: [File/Directory Path] [Continue for all identified components] Remember to be as specific as possible in your mappings, only use what is given to you from the file tree, and to strictly follow the components mentioned in the explanation. """ # ❌ BELOW IS A REMOVED SECTION FROM THE ABOVE PROMPT USED FOR CLAUDE 3.5 SONNET # Before providing your final answer, use the to think through your process: # 1. List the key components identified in the system design. # 2. For each component, brainstorm potential corresponding directories or files. # 3. Verify your mappings by double-checking the file tree. # # [Your thought process here] # # just adding some clear separation between the prompts # ************************************************************ # ************************************************************ SYSTEM_THIRD_PROMPT = """ You are a principal software engineer tasked with creating a system design diagram using Mermaid.js based on a detailed explanation. Your goal is to accurately represent the architecture and design of the project as described in the explanation. The detailed explanation of the design will be enclosed in tags in the users message. Also, sourced from the explanation, as a bonus, a few of the identified components have been mapped to their paths in the project file tree, whether it is a directory or file which will be enclosed in tags in the users message. To create the Mermaid.js diagram: 1. Carefully read and analyze the provided design explanation. 2. Identify the main components, services, and their relationships within the system. 3. Determine the appropriate Mermaid.js diagram type to use (e.g., flowchart, sequence diagram, class diagram, architecture, etc.) based on the nature of the system described. 4. Create the Mermaid.js code to represent the design, ensuring that: a. All major components are included b. Relationships between components are clearly shown c. The diagram accurately reflects the architecture described in the explanation d. The layout is logical and easy to understand Guidelines for diagram components and relationships: - Use appropriate shapes for different types of components (e.g., rectangles for services, cylinders for databases, etc.) - Use clear and concise labels for each component - Show the direction of data flow or dependencies using arrows - Group related components together if applicable - Include any important notes or annotations mentioned in the explanation - Just follow the explanation. It will have everything you need. IMPORTANT!!: Please orient and draw the diagram as vertically as possible. You must avoid long horizontal lists of nodes and sections! You must include click events for components of the diagram that have been specified in the provided : - Do not try to include the full url. This will be processed by another program afterwards. All you need to do is include the path. - For example: - This is a correct click event: `click Example "app/example.js"` - This is an incorrect click event: `click Example "https://github.com/username/repo/blob/main/app/example.js"` - Do this for as many components as specified in the component mapping, include directories and files. - If you believe the component contains files and is a directory, include the directory path. - If you believe the component references a specific file, include the file path. - Make sure to include the full path to the directory or file exactly as specified in the component mapping. - It is very important that you do this for as many files as possible. The more the better. - IMPORTANT: THESE PATHS ARE FOR CLICK EVENTS ONLY, these paths should not be included in the diagram's node's names. Only for the click events. Paths should not be seen by the user. Your output should be valid Mermaid.js code that can be rendered into a diagram. Do not include an init declaration such as `%%{init: {'key':'etc'}}%%`. This is handled externally. Just return the diagram code. Your response must strictly be just the Mermaid.js code, without any additional text or explanations. No code fence or markdown ticks needed, simply return the Mermaid.js code. Ensure that your diagram adheres strictly to the given explanation, without adding or omitting any significant components or relationships. For general direction, the provided example below is how you should structure your code: ```mermaid flowchart TD %% or graph TD, your choice %% Global entities A("Entity A"):::external %% more... %% Subgraphs and modules subgraph "Layer A" A1("Module A"):::example %% more modules... %% inner subgraphs if needed... end %% more subgraphs, modules, etc... %% Connections A -->|"relationship"| B %% and a lot more... %% Click Events click A1 "example/example.js" %% and a lot more... %% Styles classDef frontend %%... %% and a lot more... ``` EXTREMELY Important notes on syntax!!! (PAY ATTENTION TO THIS): - Make sure to add colour to the diagram!!! This is extremely critical. - In Mermaid.js syntax, we cannot include special characters for nodes without being inside quotes! For example: `EX[/api/process (Backend)]:::api` and `API -->|calls Process()| Backend` are two examples of syntax errors. They should be `EX["/api/process (Backend)"]:::api` and `API -->|"calls Process()"| Backend` respectively. Notice the quotes. This is extremely important. Make sure to include quotes for any string that contains special characters. - In Mermaid.js syntax, you cannot apply a class style directly within a subgraph declaration. For example: `subgraph "Frontend Layer":::frontend` is a syntax error. However, you can apply them to nodes within the subgraph. For example: `Example["Example Node"]:::frontend` is valid, and `class Example1,Example2 frontend` is valid. - In Mermaid.js syntax, there cannot be spaces in the relationship label names. For example: `A -->| "example relationship" | B` is a syntax error. It should be `A -->|"example relationship"| B` - In Mermaid.js syntax, you cannot give subgraphs an alias like nodes. For example: `subgraph A "Layer A"` is a syntax error. It should be `subgraph "Layer A"` """ # ^^^ note: ive generated a few diagrams now and claude still writes incorrect mermaid code sometimes. in the future, refer to those generated diagrams and add important instructions to the prompt above to avoid those mistakes. examples are best. # e. A legend is included # ^ removed since it was making the diagrams very long SYSTEM_FIX_MERMAID_PROMPT = """ You are a Mermaid syntax repair specialist. You will receive: - ... - ... - ... - ... Task: - Fix Mermaid syntax errors while preserving the original diagram meaning. - Keep all click events that map to repository paths. - Keep diagram mostly vertical. - Return Mermaid code only. Rules: - No markdown code fences. - No extra commentary. - Ensure final output is syntactically valid Mermaid. """ ================================================ FILE: backend/app/routers/generate.py ================================================ from __future__ import annotations import asyncio import json import re from typing import Any from fastapi import APIRouter, Request from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel, Field, ValidationError from app.core.observability import Timer, log_event from app.prompts import ( SYSTEM_FIRST_PROMPT, SYSTEM_FIX_MERMAID_PROMPT, SYSTEM_SECOND_PROMPT, SYSTEM_THIRD_PROMPT, ) from app.services.github_service import GitHubService from app.services.mermaid_service import format_validation_feedback, validate_mermaid_syntax from app.services.model_config import get_model from app.services.openai_service import OpenAIService from app.services.pricing import estimate_text_token_cost_usd router = APIRouter(prefix="/generate", tags=["OpenAI"]) openai_service = OpenAIService() MAX_MERMAID_FIX_ATTEMPTS = 3 MULTI_STAGE_INPUT_MULTIPLIER = 2 INPUT_OVERHEAD_TOKENS = 3000 ESTIMATED_OUTPUT_TOKENS = 8000 class GenerateRequest(BaseModel): username: str = Field(min_length=1) repo: str = Field(min_length=1) api_key: str | None = Field(default=None, min_length=1) github_pat: str | None = Field(default=None, min_length=1) def _sse_message(payload: dict[str, Any]) -> str: return f"data: {json.dumps(payload)}\n\n" def _strip_mermaid_code_fences(text: str) -> str: return text.replace("```mermaid", "").replace("```", "").strip() def _extract_component_mapping(response: str) -> str: start_tag = "" end_tag = "" start_index = response.find(start_tag) end_index = response.find(end_tag) if start_index == -1 or end_index == -1: return response return response[start_index:end_index] def process_click_events(diagram: str, username: str, repo: str, branch: str) -> str: click_pattern = r'click ([^\s"]+)\s+"([^"]+)"' def replace_path(match: re.Match[str]) -> str: node_id = match.group(1) trimmed_path = match.group(2).strip().strip("\"'") is_file = "." in trimmed_path and not trimmed_path.endswith("/") path_type = "blob" if is_file else "tree" full_url = f"https://github.com/{username}/{repo}/{path_type}/{branch}/{trimmed_path}" return f'click {node_id} "{full_url}"' return re.sub(click_pattern, replace_path, diagram) def _parse_request_payload(payload: Any) -> tuple[GenerateRequest | None, str | None]: try: parsed = GenerateRequest.model_validate(payload) return parsed, None except ValidationError: return None, "Invalid request payload." def _get_github_data(username: str, repo: str, github_pat: str | None): github_service = GitHubService(pat=github_pat) return github_service.get_github_data(username, repo) async def _estimate_repo_input_tokens( model: str, file_tree: str, readme: str, api_key: str | None = None, ) -> int: try: return await openai_service.count_input_tokens( model=model, system_prompt=SYSTEM_FIRST_PROMPT, data={ "file_tree": file_tree, "readme": readme, }, api_key=api_key, reasoning_effort="medium", ) except Exception: return openai_service.estimate_tokens(f"{file_tree}\n{readme}") @router.post("/cost") async def get_generation_cost(request: Request): timer = Timer() try: payload = await request.json() parsed, error = _parse_request_payload(payload) if not parsed: return JSONResponse( { "ok": False, "error": error, "error_code": "VALIDATION_ERROR", } ) github_data = _get_github_data(parsed.username, parsed.repo, parsed.github_pat) model = get_model() base_input_tokens = await _estimate_repo_input_tokens( model=model, file_tree=github_data.file_tree, readme=github_data.readme, api_key=parsed.api_key, ) estimated_input_tokens = ( base_input_tokens * MULTI_STAGE_INPUT_MULTIPLIER + INPUT_OVERHEAD_TOKENS ) estimated_output_tokens = ESTIMATED_OUTPUT_TOKENS cost_usd, pricing_model, pricing = estimate_text_token_cost_usd( model=model, input_tokens=estimated_input_tokens, output_tokens=estimated_output_tokens, ) response_payload = { "ok": True, "cost": f"${cost_usd:.2f} USD", "model": model, "pricing_model": pricing_model, "estimated_input_tokens": estimated_input_tokens, "estimated_output_tokens": estimated_output_tokens, "pricing": { "input_per_million_usd": pricing.input_per_million_usd, "output_per_million_usd": pricing.output_per_million_usd, }, } log_event( "generate.cost.success", username=parsed.username, repo=parsed.repo, elapsed_ms=timer.elapsed_ms(), model=model, ) return JSONResponse(response_payload) except Exception as exc: log_event( "generate.cost.failed", elapsed_ms=timer.elapsed_ms(), error=str(exc), ) return JSONResponse( { "ok": False, "error": str(exc) if isinstance(exc, Exception) else "Failed to estimate generation cost.", "error_code": "COST_ESTIMATION_FAILED", } ) @router.post("/stream") async def generate_stream(request: Request): try: payload = await request.json() except Exception: return JSONResponse( { "ok": False, "error": "Invalid request payload.", "error_code": "VALIDATION_ERROR", }, status_code=400, ) parsed, error = _parse_request_payload(payload) if not parsed: return JSONResponse( { "ok": False, "error": error, "error_code": "VALIDATION_ERROR", }, status_code=400, ) async def event_generator(): timer = Timer() def send(payload: dict[str, Any]) -> str: return _sse_message(payload) try: github_data = _get_github_data(parsed.username, parsed.repo, parsed.github_pat) model = get_model() token_count = await _estimate_repo_input_tokens( model=model, file_tree=github_data.file_tree, readme=github_data.readme, api_key=parsed.api_key, ) yield send( { "status": "started", "message": "Starting generation process...", } ) if token_count > 50000 and token_count < 195000 and not parsed.api_key: yield send( { "status": "error", "error": "File tree and README combined exceeds token limit (50,000). This repository is too large for free generation. Provide your own OpenAI API key to continue.", "error_code": "API_KEY_REQUIRED", } ) return if token_count > 195000: yield send( { "status": "error", "error": "Repository is too large (>195k tokens) for analysis. Try a smaller repo.", "error_code": "TOKEN_LIMIT_EXCEEDED", } ) return yield send( { "status": "explanation_sent", "message": f"Sending explanation request to {model}...", } ) await asyncio.sleep(0.08) yield send( { "status": "explanation", "message": "Analyzing repository structure...", } ) explanation = "" async for chunk in openai_service.stream_completion( model=model, system_prompt=SYSTEM_FIRST_PROMPT, data={ "file_tree": github_data.file_tree, "readme": github_data.readme, }, api_key=parsed.api_key, reasoning_effort="medium", ): explanation += chunk yield send({"status": "explanation_chunk", "chunk": chunk}) yield send( { "status": "mapping_sent", "message": f"Sending component mapping request to {model}...", } ) await asyncio.sleep(0.08) yield send( { "status": "mapping", "message": "Creating component mapping...", } ) full_mapping_response = "" async for chunk in openai_service.stream_completion( model=model, system_prompt=SYSTEM_SECOND_PROMPT, data={ "explanation": explanation, "file_tree": github_data.file_tree, }, api_key=parsed.api_key, reasoning_effort="low", ): full_mapping_response += chunk yield send({"status": "mapping_chunk", "chunk": chunk}) component_mapping = _extract_component_mapping(full_mapping_response) yield send( { "status": "diagram_sent", "message": f"Sending diagram generation request to {model}...", } ) await asyncio.sleep(0.08) yield send( { "status": "diagram", "message": "Generating diagram...", } ) mermaid_code = "" async for chunk in openai_service.stream_completion( model=model, system_prompt=SYSTEM_THIRD_PROMPT, data={ "explanation": explanation, "component_mapping": component_mapping, }, api_key=parsed.api_key, reasoning_effort="low", ): mermaid_code += chunk yield send({"status": "diagram_chunk", "chunk": chunk}) candidate_diagram = _strip_mermaid_code_fences(mermaid_code) validation_result = await asyncio.to_thread( validate_mermaid_syntax, candidate_diagram, ) had_fix_loop = not validation_result.valid if not validation_result.valid: parser_feedback = format_validation_feedback(validation_result) yield send( { "status": "diagram_fixing", "message": "Diagram generated. Mermaid syntax validation failed, starting auto-fix loop...", "parser_error": parser_feedback, } ) attempt = 1 while (not validation_result.valid) and attempt <= MAX_MERMAID_FIX_ATTEMPTS: parser_feedback = format_validation_feedback(validation_result) yield send( { "status": "diagram_fix_attempt", "message": f"Fixing Mermaid syntax (attempt {attempt}/{MAX_MERMAID_FIX_ATTEMPTS})...", "fix_attempt": attempt, "fix_max_attempts": MAX_MERMAID_FIX_ATTEMPTS, "parser_error": parser_feedback, } ) repaired_diagram = "" async for chunk in openai_service.stream_completion( model=model, system_prompt=SYSTEM_FIX_MERMAID_PROMPT, data={ "mermaid_code": candidate_diagram, "parser_error": parser_feedback, "explanation": explanation, "component_mapping": component_mapping, }, api_key=parsed.api_key, reasoning_effort="low", ): repaired_diagram += chunk yield send( { "status": "diagram_fix_chunk", "chunk": chunk, "fix_attempt": attempt, "fix_max_attempts": MAX_MERMAID_FIX_ATTEMPTS, } ) candidate_diagram = _strip_mermaid_code_fences(repaired_diagram) yield send( { "status": "diagram_fix_validating", "message": f"Validating Mermaid syntax after attempt {attempt}/{MAX_MERMAID_FIX_ATTEMPTS}...", "fix_attempt": attempt, "fix_max_attempts": MAX_MERMAID_FIX_ATTEMPTS, } ) validation_result = await asyncio.to_thread( validate_mermaid_syntax, candidate_diagram, ) attempt += 1 if not validation_result.valid: yield send( { "status": "error", "error": "Generated Mermaid remained syntactically invalid after auto-fix attempts. Please retry generation.", "error_code": "MERMAID_SYNTAX_UNRESOLVED", "parser_error": format_validation_feedback(validation_result), } ) return processed_diagram = process_click_events( candidate_diagram, parsed.username, parsed.repo, github_data.default_branch, ) if had_fix_loop: yield send( { "status": "diagram_fixing", "message": "Mermaid syntax validated. Finalizing diagram output...", } ) yield send( { "status": "complete", "diagram": processed_diagram, "explanation": explanation, "mapping": component_mapping, } ) log_event( "generate.stream.success", username=parsed.username, repo=parsed.repo, elapsed_ms=timer.elapsed_ms(), model=model, ) except Exception as exc: yield send( { "status": "error", "error": str(exc) if isinstance(exc, Exception) else "Streaming generation failed.", "error_code": "STREAM_FAILED", } ) log_event( "generate.stream.failed", username=parsed.username, repo=parsed.repo, elapsed_ms=timer.elapsed_ms(), error=str(exc), ) return StreamingResponse( event_generator(), media_type="text/event-stream", headers={ "Content-Type": "text/event-stream; charset=utf-8", "Cache-Control": "no-cache, no-transform", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, ) ================================================ FILE: backend/app/services/github_service.py ================================================ from __future__ import annotations import base64 import os from datetime import UTC, datetime, timedelta from dataclasses import dataclass import jwt import requests EXCLUDED_PATTERNS = [ "node_modules/", "vendor/", "venv/", ".min.", ".pyc", ".pyo", ".pyd", ".so", ".dll", ".class", ".jpg", ".jpeg", ".png", ".gif", ".ico", ".svg", ".ttf", ".woff", ".webp", "__pycache__/", ".cache/", ".tmp/", "yarn.lock", "poetry.lock", "*.log", ".vscode/", ".idea/", ] @dataclass(frozen=True) class GithubData: default_branch: str file_tree: str readme: str def _should_include_file(path: str) -> bool: lower_path = path.lower() return not any(pattern in lower_path for pattern in EXCLUDED_PATTERNS) def _fetch_json(url: str, headers: dict[str, str], not_found_message: str) -> dict: response = requests.get(url, headers=headers, timeout=30) if response.status_code == 404: raise ValueError(not_found_message) if not response.ok: raise ValueError(f"GitHub request failed ({response.status_code}): {response.text}") return response.json() class GitHubService: def __init__(self, pat: str | None = None): # Request-provided PAT (or env PAT) has top priority. self.github_token = (pat or os.getenv("GITHUB_PAT") or "").strip() or None # GitHub App credentials are used when PAT is unavailable. self.client_id = (os.getenv("GITHUB_CLIENT_ID") or "").strip() or None self.private_key = (os.getenv("GITHUB_PRIVATE_KEY") or "").strip() or None self.installation_id = (os.getenv("GITHUB_INSTALLATION_ID") or "").strip() or None self.access_token: str | None = None self.token_expires_at: datetime | None = None def _normalize_private_key(self) -> str: if not self.private_key: raise ValueError("Missing GITHUB_PRIVATE_KEY.") # Supports both literal newlines and escaped \\n forms. return self.private_key.replace("\\n", "\n") def _can_use_app_auth(self) -> bool: return bool(self.client_id and self.private_key and self.installation_id) def _generate_jwt(self) -> str: if not self.client_id: raise ValueError("Missing GITHUB_CLIENT_ID.") now = int(datetime.now(UTC).timestamp()) payload = { "iat": now, "exp": now + (10 * 60), "iss": self.client_id, } return jwt.encode(payload, self._normalize_private_key(), algorithm="RS256") def _get_installation_token(self) -> str: if self.access_token and self.token_expires_at and self.token_expires_at > datetime.now(UTC): return self.access_token if not self.installation_id: raise ValueError("Missing GITHUB_INSTALLATION_ID.") jwt_token = self._generate_jwt() response = requests.post( f"https://api.github.com/app/installations/{self.installation_id}/access_tokens", headers={ "Authorization": f"Bearer {jwt_token}", "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", }, timeout=30, ) if not response.ok: raise ValueError( f"GitHub app token request failed ({response.status_code}): {response.text}" ) payload = response.json() token = payload.get("token") if not isinstance(token, str) or not token: raise ValueError("GitHub app token response missing token.") expires_at_raw = payload.get("expires_at") if isinstance(expires_at_raw, str): try: expires_at = datetime.fromisoformat(expires_at_raw.replace("Z", "+00:00")) except ValueError: expires_at = datetime.now(UTC) + timedelta(minutes=50) else: expires_at = datetime.now(UTC) + timedelta(minutes=50) self.access_token = token self.token_expires_at = expires_at return token def _get_headers(self) -> dict[str, str]: if self.github_token: return { "Authorization": f"token {self.github_token}", "Accept": "application/vnd.github+json", } if self._can_use_app_auth(): token = self._get_installation_token() return { "Authorization": f"Bearer {token}", "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", } return {"Accept": "application/vnd.github+json"} def get_default_branch(self, username: str, repo: str) -> str: data = _fetch_json( f"https://api.github.com/repos/{username}/{repo}", self._get_headers(), "Repository not found.", ) return data.get("default_branch") or "main" def get_github_file_paths_as_list(self, username: str, repo: str, branch: str) -> str: data = _fetch_json( f"https://api.github.com/repos/{username}/{repo}/git/trees/{branch}?recursive=1", self._get_headers(), "Could not fetch repository file tree.", ) paths = [ item.get("path") for item in (data.get("tree") or []) if isinstance(item.get("path"), str) and _should_include_file(item["path"]) ] if not paths: raise ValueError( "Could not fetch repository file tree. Repository might be empty or inaccessible." ) return "\n".join(paths) def get_github_readme(self, username: str, repo: str) -> str: data = _fetch_json( f"https://api.github.com/repos/{username}/{repo}/readme", self._get_headers(), "No README found for the specified repository.", ) content = data.get("content") if not isinstance(content, str) or not content: raise ValueError("No README found for the specified repository.") encoding = data.get("encoding") if encoding == "base64": return base64.b64decode(content).decode("utf-8") return content def get_github_data(self, username: str, repo: str) -> GithubData: default_branch = self.get_default_branch(username, repo) file_tree = self.get_github_file_paths_as_list(username, repo, default_branch) readme = self.get_github_readme(username, repo) return GithubData( default_branch=default_branch, file_tree=file_tree, readme=readme, ) ================================================ FILE: backend/app/services/mermaid_service.py ================================================ from __future__ import annotations import json import subprocess from dataclasses import dataclass @dataclass(frozen=True) class MermaidValidationResult: valid: bool message: str | None = None line: int | None = None token: str | None = None expected: list[str] | None = None def normalize_parser_message(message: str | None) -> str: if not message: return "Mermaid syntax is invalid and could not be parsed." if "sanitize is not a function" in message or "__TURBOPACK__imported__module" in message: return "Mermaid parser runtime failed in server context (sanitizer issue)." return message def validate_mermaid_syntax(diagram: str) -> MermaidValidationResult: try: proc = subprocess.run( ["node", "scripts/validate_mermaid.mjs"], input=diagram, text=True, capture_output=True, check=False, ) except Exception as exc: return MermaidValidationResult( valid=False, message=normalize_parser_message(str(exc)), ) if proc.returncode != 0: message = proc.stderr.strip() or proc.stdout.strip() or "Mermaid validation failed." return MermaidValidationResult(valid=False, message=normalize_parser_message(message)) try: payload = json.loads(proc.stdout) except json.JSONDecodeError: return MermaidValidationResult( valid=False, message=normalize_parser_message("Mermaid validator returned invalid JSON."), ) valid = bool(payload.get("valid")) message = payload.get("message") normalized_message = ( normalize_parser_message(message) if not valid else (message if isinstance(message, str) else None) ) return MermaidValidationResult( valid=valid, message=normalized_message, line=payload.get("line"), token=payload.get("token"), expected=payload.get("expected"), ) def format_validation_feedback(result: MermaidValidationResult) -> str: if result.valid: return "No syntax errors found." details = [f"message: {result.message or 'unknown parse error'}"] if isinstance(result.line, int): details.append(f"line: {result.line}") if result.token: details.append(f"token: {result.token}") if result.expected: details.append(f"expected: {', '.join(result.expected)}") return "\n".join(details) ================================================ FILE: backend/app/services/model_config.py ================================================ from __future__ import annotations import os DEFAULT_MODEL = "gpt-5.4-mini" def get_model() -> str: model = os.getenv("OPENAI_MODEL", "").strip() return model or DEFAULT_MODEL ================================================ FILE: backend/app/services/openai_service.py ================================================ from __future__ import annotations from typing import AsyncGenerator, Literal import math import os from dotenv import load_dotenv from openai import AsyncOpenAI from app.utils.format_message import format_user_message load_dotenv() ReasoningEffort = Literal["low", "medium", "high"] class OpenAIService: def __init__(self): self.default_api_key = os.getenv("OPENAI_API_KEY") def _resolve_api_key(self, override_api_key: str | None = None) -> str: api_key = (override_api_key or self.default_api_key or "").strip() if not api_key: raise ValueError( "Missing OpenAI API key. Set OPENAI_API_KEY or provide api_key in request." ) return api_key @staticmethod def estimate_tokens(text: str) -> int: # Mirrors Next.js fallback heuristic. return math.ceil(len(text) / 4) @staticmethod def _build_input(system_prompt: str, user_prompt: str) -> list[dict]: return [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] @staticmethod def _create_client(api_key: str) -> AsyncOpenAI: # Keep explicit config local to this service. return AsyncOpenAI( api_key=api_key, max_retries=2, timeout=600, ) async def stream_completion( self, *, model: str, system_prompt: str, data: dict[str, str | None], api_key: str | None = None, reasoning_effort: ReasoningEffort | None = None, max_output_tokens: int | None = None, ) -> AsyncGenerator[str, None]: user_prompt = format_user_message(data) resolved_api_key = self._resolve_api_key(api_key) payload: dict = { "model": model, "stream": True, "input": self._build_input(system_prompt, user_prompt), } if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} if max_output_tokens: payload["max_output_tokens"] = max_output_tokens client = self._create_client(resolved_api_key) stream = await client.responses.create(**payload) try: async for event in stream: if event.type == "response.output_text.delta": delta = getattr(event, "delta", None) if isinstance(delta, str) and delta: yield delta continue if event.type == "error": message = getattr(event, "message", None) or "OpenAI stream failed." raise ValueError(str(message)) finally: await stream.close() await client.close() async def count_input_tokens( self, *, model: str, system_prompt: str, data: dict[str, str | None], api_key: str | None = None, reasoning_effort: ReasoningEffort | None = None, ) -> int: user_prompt = format_user_message(data) resolved_api_key = self._resolve_api_key(api_key) payload: dict = { "model": model, "input": self._build_input(system_prompt, user_prompt), } if reasoning_effort: payload["reasoning"] = {"effort": reasoning_effort} client = self._create_client(resolved_api_key) try: response = await client.responses.input_tokens.count(**payload) input_tokens = getattr(response, "input_tokens", None) if not isinstance(input_tokens, int): raise ValueError("OpenAI input token count returned invalid payload.") return input_tokens finally: await client.close() ================================================ FILE: backend/app/services/pricing.py ================================================ from __future__ import annotations from dataclasses import dataclass DEFAULT_PRICING_MODEL = "gpt-5.4-mini" @dataclass(frozen=True) class ModelPricing: input_per_million_usd: float output_per_million_usd: float MODEL_PRICING: dict[str, ModelPricing] = { "gpt-5.4": ModelPricing(input_per_million_usd=2.5, output_per_million_usd=15.0), "gpt-5.4-pro": ModelPricing(input_per_million_usd=30.0, output_per_million_usd=180.0), "gpt-5.4-mini": ModelPricing(input_per_million_usd=0.75, output_per_million_usd=4.5), "gpt-5.4-nano": ModelPricing(input_per_million_usd=0.2, output_per_million_usd=1.25), "gpt-5.2": ModelPricing(input_per_million_usd=1.75, output_per_million_usd=14.0), "gpt-5.2-chat-latest": ModelPricing( input_per_million_usd=1.75, output_per_million_usd=14.0, ), "gpt-5.2-codex": ModelPricing(input_per_million_usd=1.75, output_per_million_usd=14.0), "gpt-5.2-pro": ModelPricing(input_per_million_usd=21.0, output_per_million_usd=168.0), "gpt-5.1": ModelPricing(input_per_million_usd=1.25, output_per_million_usd=10.0), "gpt-5": ModelPricing(input_per_million_usd=1.25, output_per_million_usd=10.0), "gpt-5-mini": ModelPricing(input_per_million_usd=0.25, output_per_million_usd=2.0), "gpt-5-nano": ModelPricing(input_per_million_usd=0.05, output_per_million_usd=0.4), "o4-mini": ModelPricing(input_per_million_usd=1.1, output_per_million_usd=4.4), } DEFAULT_PRICING = MODEL_PRICING[DEFAULT_PRICING_MODEL] def _strip_date_snapshot_suffix(model: str) -> str: import re return re.sub(r"-\d{4}-\d{2}-\d{2}$", "", model, flags=re.IGNORECASE) def resolve_pricing_model(model: str) -> str: normalized = model.strip().lower() if normalized in MODEL_PRICING: return normalized without_date = _strip_date_snapshot_suffix(normalized) if without_date in MODEL_PRICING: return without_date if without_date.startswith("gpt-5.4-pro"): return "gpt-5.4-pro" if without_date.startswith("gpt-5.4-mini"): return "gpt-5.4-mini" if without_date.startswith("gpt-5.4-nano"): return "gpt-5.4-nano" if without_date.startswith("gpt-5.4"): return "gpt-5.4" if without_date.startswith("gpt-5.2-pro"): return "gpt-5.2-pro" if without_date.startswith("gpt-5.2-codex"): return "gpt-5.2-codex" if without_date.startswith("gpt-5.2-chat"): return "gpt-5.2-chat-latest" if without_date.startswith("gpt-5.2"): return "gpt-5.2" if without_date.startswith("gpt-5.1"): return "gpt-5.1" if without_date.startswith("gpt-5-mini"): return "gpt-5-mini" if without_date.startswith("gpt-5-nano"): return "gpt-5-nano" if without_date.startswith("gpt-5"): return "gpt-5" if without_date.startswith("o4-mini"): return "o4-mini" return DEFAULT_PRICING_MODEL def estimate_text_token_cost_usd( model: str, input_tokens: int, output_tokens: int, ) -> tuple[float, str, ModelPricing]: pricing_model = resolve_pricing_model(model) pricing = MODEL_PRICING.get(pricing_model, DEFAULT_PRICING) input_cost = (max(input_tokens, 0) / 1_000_000) * pricing.input_per_million_usd output_cost = (max(output_tokens, 0) / 1_000_000) * pricing.output_per_million_usd return (input_cost + output_cost, pricing_model, pricing) ================================================ FILE: backend/app/utils/format_message.py ================================================ def format_user_message(data: dict[str, str | None]) -> str: parts: list[str] = [] for key, value in data.items(): if isinstance(value, str): parts.append(f"<{key}>\n{value}\n") return "\n".join(parts) ================================================ FILE: backend/deploy.sh ================================================ #!/bin/bash # Exit on any error set -e # Navigate to project directory cd ~/gitdiagram # Pull latest changes git pull --ff-only origin main # Build and restart containers with production environment docker-compose down ENVIRONMENT=production docker-compose up --build -d # Remove unused images docker image prune -f # Show logs only if --logs flag is passed if [ "$1" == "--logs" ]; then docker-compose logs -f else echo "Deployment complete! Run 'docker-compose logs -f' to view logs" fi ================================================ FILE: backend/entrypoint.sh ================================================ #!/bin/bash set -euo pipefail ENVIRONMENT="${ENVIRONMENT:-production}" HOST="${HOST:-0.0.0.0}" PORT="${PORT:-8000}" WEB_CONCURRENCY="${WEB_CONCURRENCY:-2}" echo "Current ENVIRONMENT: ${ENVIRONMENT}" echo "Binding to ${HOST}:${PORT}" if [ "${ENVIRONMENT}" = "development" ]; then echo "Starting in development mode with hot reload..." exec uv run --no-dev uvicorn app.main:app --host "${HOST}" --port "${PORT}" --reload fi echo "Starting in production mode..." exec uv run --no-dev uvicorn app.main:app \ --host "${HOST}" \ --port "${PORT}" \ --timeout-keep-alive 300 \ --workers "${WEB_CONCURRENCY}" \ --loop uvloop \ --http httptools ================================================ FILE: backend/nginx/api.conf ================================================ server { server_name api.gitdiagram.com; # Block requests with no valid Host header if ($host !~ ^(api.gitdiagram.com)$) { return 444; } # Strictly allow only GET, POST, and OPTIONS requests for the specified paths (defined in my fastapi app) location ~ ^/(generate(/cost|/stream)?|healthz|)?$ { if ($request_method !~ ^(GET|POST|OPTIONS)$) { return 444; } proxy_pass http://127.0.0.1:8000; include proxy_params; proxy_redirect off; # Disable buffering for SSE proxy_buffering off; proxy_cache off; # Required headers for SSE proxy_set_header Connection ''; proxy_http_version 1.1; } # Return 444 for everything else (no response, just close connection) location / { return 444; # keep access log on } # Add timeout settings proxy_connect_timeout 300; proxy_send_timeout 300; proxy_read_timeout 300; send_timeout 300; listen 443 ssl; # managed by Certbot ssl_certificate /etc/letsencrypt/live/api.gitdiagram.com/fullchain.pem; # managed by Certbot ssl_certificate_key /etc/letsencrypt/live/api.gitdiagram.com/privkey.pem; # managed by Certbot include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot } server { if ($host = api.gitdiagram.com) { return 301 https://$host$request_uri; } # managed by Certbot listen 80; server_name api.gitdiagram.com; return 404; # managed by Certbot } ================================================ FILE: backend/nginx/setup_nginx.sh ================================================ #!/bin/bash # Exit on any error set -e # Check if running as root if [ "$EUID" -ne 0 ]; then echo "Please run as root or with sudo" exit 1 fi # Copy Nginx configuration echo "Copying Nginx configuration..." cp "$(dirname "$0")/api.conf" /etc/nginx/sites-available/api ln -sf /etc/nginx/sites-available/api /etc/nginx/sites-enabled/ # Test Nginx configuration echo "Testing Nginx configuration..." nginx -t # Reload Nginx echo "Reloading Nginx..." systemctl reload nginx echo "Nginx configuration updated successfully!" ================================================ FILE: backend/package.json ================================================ { "name": "gitdiagram-backend-mermaid-validator", "private": true, "type": "module", "dependencies": { "dompurify": "3.3.1", "jsdom": "28.1.0", "mermaid": "11.12.3" } } ================================================ FILE: backend/pyproject.toml ================================================ [project] name = "gitdiagram-backend" version = "0.1.0" description = "FastAPI backend for GitDiagram" requires-python = ">=3.12,<3.13" dependencies = [ "aiohttp==3.13.3", "api-analytics==1.2.7", "fastapi==0.128.8", "openai==2.21.0", "PyJWT[crypto]==2.11.0", "python-dotenv==1.2.1", "requests==2.32.5", "tiktoken==0.12.0", "uvicorn[standard]==0.40.0", ] [dependency-groups] dev = [ "httpx==0.28.1", "pytest==8.3.4", ] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.uv] package = false ================================================ FILE: backend/scripts/validate_mermaid.mjs ================================================ import { createRequire } from "node:module"; import { stdin, stdout, stderr } from "node:process"; import DOMPurify from "dompurify"; const require = createRequire(import.meta.url); let mermaidInstance = null; let initialized = false; let domPurifyPatched = false; function ensureDomPurifyPatched() { if (domPurifyPatched) return; try { const domPurify = DOMPurify; if (typeof domPurify === "function" && typeof domPurify.sanitize !== "function") { const { JSDOM } = require("jsdom"); const domWindow = new JSDOM("").window; const domPurifyInstance = domPurify(domWindow); Object.assign(domPurify, domPurifyInstance); } } catch { // Best effort patch. } finally { domPurifyPatched = true; } } async function getMermaid() { if (mermaidInstance) return mermaidInstance; ensureDomPurifyPatched(); const mermaidModule = await import("mermaid"); mermaidInstance = mermaidModule.default; return mermaidInstance; } async function ensureMermaidInitialized() { const mermaid = await getMermaid(); if (initialized) return mermaid; mermaid.initialize({ startOnLoad: false, securityLevel: "loose", }); initialized = true; return mermaid; } async function readStdin() { let data = ""; for await (const chunk of stdin) { data += chunk; } return data; } function normalizeError(error) { return { valid: false, message: error?.message || "Mermaid syntax is invalid and could not be parsed.", line: error?.hash?.line, token: error?.hash?.token, expected: error?.hash?.expected, }; } async function main() { try { const diagram = (await readStdin()).toString(); const mermaid = await ensureMermaidInitialized(); await mermaid.parse(diagram); stdout.write(JSON.stringify({ valid: true })); } catch (error) { stdout.write(JSON.stringify(normalizeError(error))); } } main().catch((error) => { stderr.write(String(error?.message || error)); process.exit(1); }); ================================================ FILE: backend/tests/conftest.py ================================================ from pathlib import Path import sys BACKEND_ROOT = Path(__file__).resolve().parents[1] if str(BACKEND_ROOT) not in sys.path: sys.path.insert(0, str(BACKEND_ROOT)) ================================================ FILE: backend/tests/test_generate_router.py ================================================ import json from types import SimpleNamespace from fastapi.testclient import TestClient from app.main import app from app.routers import generate from app.services.mermaid_service import MermaidValidationResult client = TestClient(app) def test_healthz_ok(): response = client.get("/healthz") assert response.status_code == 200 assert response.json() == {"ok": True, "status": "ok"} def test_generate_cost_success(monkeypatch): monkeypatch.setattr( generate, "_get_github_data", lambda username, repo, github_pat=None: SimpleNamespace( default_branch="main", file_tree="src/main.py", readme="# readme", ), ) monkeypatch.setattr(generate, "get_model", lambda: "gpt-5.4-mini") async def fake_count_input_tokens(*, model, system_prompt, data, api_key=None, reasoning_effort=None): return 100 monkeypatch.setattr(generate.openai_service, "count_input_tokens", fake_count_input_tokens) response = client.post( "/generate/cost", json={"username": "acme", "repo": "demo"}, ) assert response.status_code == 200 data = response.json() assert data["ok"] is True assert data["cost"].endswith("USD") assert data["model"] == "gpt-5.4-mini" assert data["pricing_model"] == "gpt-5.4-mini" assert "estimated_input_tokens" in data assert "estimated_output_tokens" in data def test_generate_cost_error(monkeypatch): def fail_github_data(username, repo, github_pat=None): raise ValueError("repo not found") monkeypatch.setattr(generate, "_get_github_data", fail_github_data) response = client.post( "/generate/cost", json={"username": "acme", "repo": "missing"}, ) assert response.status_code == 200 data = response.json() assert data["ok"] is False assert data["error_code"] == "COST_ESTIMATION_FAILED" def test_generate_stream_event_order_with_fix_loop(monkeypatch): monkeypatch.setattr( generate, "_get_github_data", lambda username, repo, github_pat=None: SimpleNamespace( default_branch="main", file_tree="src/main.py", readme="# readme", ), ) monkeypatch.setattr(generate, "get_model", lambda: "gpt-5.4-mini") async def fake_estimate_repo_input_tokens(model, file_tree, readme, api_key=None): return 1000 async def fake_stream_completion(*, model, system_prompt, data, api_key=None, reasoning_effort=None, max_output_tokens=None): if "explaining to a principal" in system_prompt: yield "Repo explanation" return if "mapping key components" in system_prompt: yield "" yield "1. API: src/main.py" yield "" return if "syntax repair specialist" in system_prompt: yield 'flowchart TD\nA["API"] --> B["Worker"]\nclick A "src/main.py"' return yield 'flowchart TD\nA["API"] --> B["Worker"]\nclick A "src/main.py"' validation_results = iter( [ MermaidValidationResult(valid=False, message="bad syntax"), MermaidValidationResult(valid=True), ] ) monkeypatch.setattr(generate, "_estimate_repo_input_tokens", fake_estimate_repo_input_tokens) monkeypatch.setattr(generate.openai_service, "stream_completion", fake_stream_completion) monkeypatch.setattr(generate, "validate_mermaid_syntax", lambda diagram: next(validation_results)) response = client.post( "/generate/stream", json={"username": "acme", "repo": "demo"}, ) assert response.status_code == 200 events = [] payloads = [] for block in response.text.split("\n\n"): if not block.startswith("data: "): continue payload = json.loads(block[6:]) payloads.append(payload) if "status" in payload: events.append(payload["status"]) assert "started" in events assert "explanation_sent" in events assert "mapping_sent" in events assert "diagram_sent" in events assert "diagram_fixing" in events assert "diagram_fix_attempt" in events assert "diagram_fix_validating" in events assert events[-1] == "complete" complete_payload = payloads[-1] assert complete_payload["status"] == "complete" assert "https://github.com/acme/demo/blob/main/src/main.py" in complete_payload["diagram"] def test_modify_route_removed(): response = client.post("/modify", json={}) assert response.status_code == 404 ================================================ FILE: backend/tests/test_generate_utils.py ================================================ from app.routers.generate import process_click_events def test_process_click_events_builds_blob_and_tree_links(): diagram = 'flowchart TD\nclick Api "src/api.ts"\nclick Core "src/core"' output = process_click_events(diagram, "u", "r", "main") assert 'click Api "https://github.com/u/r/blob/main/src/api.ts"' in output assert 'click Core "https://github.com/u/r/tree/main/src/core"' in output ================================================ FILE: backend/tests/test_pricing.py ================================================ from app.services.pricing import estimate_text_token_cost_usd, resolve_pricing_model def test_resolve_pricing_model_keeps_gpt_5_4_mini_on_its_own_tier(): assert resolve_pricing_model("gpt-5.4-mini") == "gpt-5.4-mini" assert resolve_pricing_model("gpt-5.4-mini-2026-03-17") == "gpt-5.4-mini" def test_estimate_text_token_cost_uses_gpt_5_4_mini_pricing(): cost_usd, pricing_model, pricing = estimate_text_token_cost_usd( model="gpt-5.4-mini", input_tokens=1_000_000, output_tokens=1_000_000, ) assert pricing_model == "gpt-5.4-mini" assert pricing.input_per_million_usd == 0.75 assert pricing.output_per_million_usd == 4.5 assert cost_usd == 5.25 ================================================ FILE: components.json ================================================ { "$schema": "https://ui.shadcn.com/schema.json", "style": "default", "rsc": true, "tsx": true, "tailwind": { "config": "tailwind.config.ts", "css": "src/styles/globals.css", "baseColor": "neutral", "cssVariables": true, "prefix": "" }, "aliases": { "components": "~/components", "utils": "~/lib/utils", "ui": "~/components/ui", "lib": "~/lib", "hooks": "~/hooks" }, "iconLibrary": "lucide" } ================================================ FILE: docker-compose.yml ================================================ services: api: build: context: ./backend dockerfile: Dockerfile ports: - "8000:8000" volumes: - ./backend:/app env_file: - .env environment: - ENVIRONMENT=${ENVIRONMENT:-development} # Default to development if not set restart: unless-stopped ================================================ FILE: docs/dev-setup.md ================================================ # Local Development Setup This project runs generation primarily through the FastAPI backend in `backend/` (Railway in production). Next.js Route Handlers under `/api/generate/*` remain available as an optional fallback path. ## 1) Install tool versions Recommended versions: - Node.js: `22.x` (see `.nvmrc`) - pnpm: `9.13.0` - Python: `3.12.x` (required for FastAPI backend work) - uv: `0.5.24+` (required for FastAPI backend work) - Docker: latest stable Install/check: ```bash node -v pnpm -v python3 --version uv --version docker --version ``` Expected: - Node starts with `v22` - pnpm prints `9.13.0` (or compatible in the same series) - Python starts with `3.12` ## 2) Install frontend dependencies ```bash pnpm install ``` ## 3) Sync backend dependencies with uv ```bash cd backend uv sync --no-install-project cd .. ``` This creates `backend/.venv` and installs pinned Python dependencies from `backend/uv.lock`. ## 4) Configure environment variables ```bash cp .env.example .env ``` Then set at least: - `POSTGRES_URL` - `OPENAI_API_KEY` Optional: - `OPENAI_MODEL` (single model used for all generation stages, defaults to `gpt-5.4-mini`) - `GITHUB_PAT` - `NEXT_PUBLIC_POSTHOG_KEY` - `NEXT_PUBLIC_USE_LEGACY_BACKEND=true` and `NEXT_PUBLIC_API_DEV_URL` (to route frontend calls to an external backend such as Railway/local FastAPI) ## 5) Start local services Start local Postgres (if using local DB URL): ```bash chmod +x start-database.sh ./start-database.sh ``` Push schema: ```bash pnpm db:push ``` Start frontend: ```bash pnpm dev ``` Start FastAPI backend (recommended for production parity): ```bash docker-compose up --build -d docker-compose logs -f api ``` or ```bash pnpm dev:backend ``` If the FastAPI backend is running locally at `http://localhost:8000`, set: - `NEXT_PUBLIC_USE_LEGACY_BACKEND=true` - `NEXT_PUBLIC_API_DEV_URL=http://localhost:8000` ## 6) Verification commands Run all baseline checks: ```bash pnpm check pnpm test pnpm build ``` FastAPI backend checks: ```bash cd backend uv run pytest -q uv run python -m compileall app cd .. ``` If all pass, your local environment is ready. ================================================ FILE: docs/railway-backend.md ================================================ # Railway Backend Deploy Guide This guide deploys the production FastAPI backend from this monorepo. ## 1) Prerequisites - Railway account + project access - Railway CLI installed - Logged in locally: ```bash railway login ``` ## 2) Create/link the Railway service You can use dashboard or CLI. CLI flow: ```bash cd /path/to/gitdiagram railway init -n gitdiagram railway add --service gitdiagram-api railway link --service gitdiagram-api ``` ## 3) Set backend environment variables Required: - `OPENAI_API_KEY` Recommended: - `OPENAI_MODEL=gpt-5.4-mini` - `ENVIRONMENT=production` - `WEB_CONCURRENCY=2` - `CORS_ORIGINS=https://gitdiagram.com,https://www.gitdiagram.com,https://` Optional: - `GITHUB_PAT` (higher GitHub API rate limits for repository fetches) - `GITHUB_CLIENT_ID` - `GITHUB_PRIVATE_KEY` - `GITHUB_INSTALLATION_ID` - `API_ANALYTICS_KEY` Set variables via CLI: ```bash railway variables --service gitdiagram-api --set "OPENAI_API_KEY=..." railway variables --service gitdiagram-api --set "OPENAI_MODEL=gpt-5.4-mini" railway variables --service gitdiagram-api --set "ENVIRONMENT=production" railway variables --service gitdiagram-api --set "WEB_CONCURRENCY=2" railway variables --service gitdiagram-api --set "CORS_ORIGINS=https://gitdiagram.com,https://www.gitdiagram.com,https://" ``` Do not set `PORT` manually unless needed. Railway injects it automatically. ## 4) Deploy backend from `backend/` ```bash cd /path/to/gitdiagram railway up --service gitdiagram-api --path-as-root backend ``` ## 5) Create a public Railway domain ```bash railway domain --service gitdiagram-api ``` Copy the generated URL, for example: `https://gitdiagram-api-production-xxxx.up.railway.app` ## 6) Point Vercel frontend to Railway backend In your Vercel project environment variables, set: - `NEXT_PUBLIC_USE_LEGACY_BACKEND=true` - `NEXT_PUBLIC_API_DEV_URL=https://` Then redeploy Vercel. Note: the variable name includes "LEGACY" for backward compatibility, but this is now the primary external backend path. ## 7) Verify 1. Health endpoint: - `GET https:///healthz` - expected JSON: `{"ok": true, "status": "ok"}` 2. Open your frontend and generate a diagram. 3. Check Railway logs: ```bash railway logs --service gitdiagram-api ``` ================================================ FILE: drizzle.config.ts ================================================ import { type Config } from "drizzle-kit"; import { env } from "~/env"; export default { schema: "./src/server/db/schema.ts", dialect: "postgresql", dbCredentials: { url: env.POSTGRES_URL, }, tablesFilter: ["gitdiagram_*"], } satisfies Config; ================================================ FILE: eslint.config.mjs ================================================ import nextCoreVitals from "eslint-config-next/core-web-vitals"; import nextTypescript from "eslint-config-next/typescript"; import drizzle from "eslint-plugin-drizzle"; import tseslint from "@typescript-eslint/eslint-plugin"; const config = [ ...nextCoreVitals, ...nextTypescript, { ignores: [ ".next/**", "node_modules/**", "backend/**", "dist/**", "coverage/**", "next-env.d.ts", ], }, { files: ["**/*.{ts,tsx}"], plugins: { drizzle, "@typescript-eslint": tseslint, }, rules: { "@typescript-eslint/array-type": "off", "@typescript-eslint/consistent-type-definitions": "off", "@typescript-eslint/consistent-type-imports": [ "warn", { prefer: "type-imports", fixStyle: "inline-type-imports", }, ], "@typescript-eslint/no-require-imports": "off", "@typescript-eslint/no-unused-vars": [ "warn", { argsIgnorePattern: "^_", }, ], "@typescript-eslint/require-await": "off", "react-hooks/set-state-in-effect": "off", "drizzle/enforce-delete-with-where": [ "error", { drizzleObjectName: ["db", "ctx.db"], }, ], "drizzle/enforce-update-with-where": [ "error", { drizzleObjectName: ["db", "ctx.db"], }, ], }, }, ]; export default config; ================================================ FILE: next.config.js ================================================ /** * Run `build` or `dev` with `SKIP_ENV_VALIDATION` to skip env validation. This is especially useful * for Docker builds. */ import "./src/env.js"; /** @type {import("next").NextConfig} */ const config = { reactStrictMode: false, async rewrites() { return [ { source: "/phx9a/static/:path*", destination: "https://us-assets.i.posthog.com/static/:path*", }, { source: "/phx9a/:path*", destination: "https://us.i.posthog.com/:path*", }, ]; }, // This is required to support PostHog trailing slash API requests skipTrailingSlashRedirect: true, }; export default config; ================================================ FILE: package.json ================================================ { "name": "gitdiagram", "version": "0.1.0", "private": true, "type": "module", "scripts": { "build": "next build", "check": "pnpm lint && tsc --noEmit", "db:generate": "drizzle-kit generate", "db:migrate": "drizzle-kit migrate", "db:push": "drizzle-kit push", "db:studio": "drizzle-kit studio", "dev": "next dev --turbo", "dev:backend": "cd backend && ENVIRONMENT=development uv run --no-dev uvicorn app.main:app --host 0.0.0.0 --port 8000 --reload", "lint": "eslint . --ext .js,.jsx,.ts,.tsx", "lint:fix": "eslint . --ext .js,.jsx,.ts,.tsx --fix", "preview": "next build && next start", "start": "next start", "start:backend": "cd backend && ENVIRONMENT=production uv run --no-dev uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000}", "test": "vitest run", "test:backend": "cd backend && uv run pytest -q", "test:watch": "vitest", "typecheck": "tsc --noEmit", "format:write": "prettier --write \"**/*.{ts,tsx,js,jsx,mdx}\" --cache", "format:check": "prettier --check \"**/*.{ts,tsx,js,jsx,mdx}\" --cache" }, "dependencies": { "@mermaid-js/layout-elk": "^0.2.1", "@neondatabase/serverless": "^1.0.2", "@radix-ui/react-dialog": "^1.1.15", "@radix-ui/react-progress": "^1.1.8", "@radix-ui/react-slot": "^1.2.4", "@radix-ui/react-switch": "^1.2.6", "@radix-ui/react-tooltip": "^1.2.8", "@t3-oss/env-nextjs": "^0.13.10", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "dompurify": "^3.3.1", "dotenv": "^17.3.1", "drizzle-orm": "^0.45.1", "geist": "^1.7.0", "ldrs": "^1.1.9", "lucide-react": "^0.574.0", "mermaid": "^11.12.3", "next": "^16.1.6", "next-themes": "^0.4.6", "openai": "^6.22.0", "postgres": "^3.4.8", "posthog-js": "^1.351.3", "react": "^19.2.4", "react-dom": "^19.2.4", "react-icons": "^5.5.0", "sonner": "^2.0.7", "svg-pan-zoom": "^3.6.2", "tailwind-merge": "^3.5.0", "tailwindcss-animate": "^1.0.7", "zod": "^4.3.6" }, "devDependencies": { "@tailwindcss/postcss": "4.2.0", "@testing-library/jest-dom": "6.9.1", "@testing-library/react": "16.3.2", "@types/eslint": "^9.6.1", "@types/node": "^25.3.0", "@types/react": "^19.2.14", "@types/react-dom": "^19.2.3", "@typescript-eslint/eslint-plugin": "^8.56.0", "@typescript-eslint/parser": "^8.56.0", "drizzle-kit": "^0.31.9", "eslint": "^9.39.2", "eslint-config-next": "^16.1.6", "eslint-plugin-drizzle": "^0.2.3", "jsdom": "26.1.0", "postcss": "^8.5.6", "prettier": "^3.8.1", "prettier-plugin-tailwindcss": "^0.7.2", "tailwind-scrollbar": "^4.0.2", "tailwindcss": "^4.2.0", "typescript": "^5.9.3", "vitest": "4.0.18" }, "ct3aMetadata": { "initVersion": "7.38.1" }, "engines": { "node": ">=22 <24", "pnpm": ">=9 <11" }, "packageManager": "pnpm@10.30.0" } ================================================ FILE: postcss.config.js ================================================ const config = { plugins: { "@tailwindcss/postcss": {}, }, }; export default config; ================================================ FILE: prettier.config.js ================================================ /** @type {import('prettier').Config & import('prettier-plugin-tailwindcss').PluginOptions} */ const config = { plugins: ["prettier-plugin-tailwindcss"], }; export default config; ================================================ FILE: src/app/[username]/[repo]/page.tsx ================================================ import type { Metadata } from "next"; import RepoPageClient from "./repo-page-client"; type RepoPageProps = { params: Promise<{ username: string; repo: string }>; }; export async function generateMetadata({ params, }: RepoPageProps): Promise { const { username, repo } = await params; return { title: `${username}/${repo} Diagram | GitDiagram`, description: `Interactive architecture diagram for ${username}/${repo}.`, }; } export default async function Repo({ params }: RepoPageProps) { const { username, repo } = await params; return ; } ================================================ FILE: src/app/[username]/[repo]/repo-page-client.tsx ================================================ "use client"; import { useState } from "react"; import MainCard from "~/components/main-card"; import Loading from "~/components/loading"; import MermaidChart from "~/components/mermaid-diagram"; import { useDiagram } from "~/hooks/useDiagram"; import { ApiKeyDialog } from "~/components/api-key-dialog"; import { ApiKeyButton } from "~/components/api-key-button"; import { useStarReminder } from "~/hooks/useStarReminder"; type RepoPageClientProps = { username: string; repo: string; }; export default function RepoPageClient({ username, repo }: RepoPageClientProps) { const [zoomingEnabled, setZoomingEnabled] = useState(false); useStarReminder(); const normalizedUsername = username.toLowerCase(); const normalizedRepo = repo.toLowerCase(); const { diagram, error, loading, lastGenerated, cost, showApiKeyDialog, handleCopy, handleApiKeySubmit, handleCloseApiKeyDialog, handleOpenApiKeyDialog, handleExportImage, handleRegenerate, state, } = useDiagram(normalizedUsername, normalizedRepo); return (
setZoomingEnabled((prev) => !prev)} loading={loading} />
{loading ? ( ) : error || state.error ? (

{error || state.error}

{state.parserError && (
                {state.parserError}
              
)} {(error?.includes("API key") || state.error?.includes("API key")) && (
)}
) : (
)}
); } ================================================ FILE: src/app/_actions/cache.ts ================================================ "use server"; import { db } from "~/server/db"; import { eq, and } from "drizzle-orm"; import { diagramCache } from "~/server/db/schema"; import { sql } from "drizzle-orm"; export async function getCachedDiagram(username: string, repo: string) { try { const cached = await db .select() .from(diagramCache) .where( and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), ) .limit(1); return cached[0]?.diagram ?? null; } catch (error) { console.error("Error fetching cached diagram:", error); return null; } } export async function getCachedExplanation(username: string, repo: string) { try { const cached = await db .select() .from(diagramCache) .where( and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), ) .limit(1); return cached[0]?.explanation ?? null; } catch (error) { console.error("Error fetching cached explanation:", error); return null; } } export async function cacheDiagramAndExplanation( username: string, repo: string, diagram: string, explanation: string, usedOwnKey = false, ) { try { await db .insert(diagramCache) .values({ username, repo, diagram, explanation, usedOwnKey, }) .onConflictDoUpdate({ target: [diagramCache.username, diagramCache.repo], set: { diagram, explanation, usedOwnKey, updatedAt: new Date(), }, }); } catch (error) { console.error("Error caching diagram:", error); } } export async function getDiagramStats() { try { const stats = await db .select({ totalDiagrams: sql`COUNT(*)`, ownKeyUsers: sql`COUNT(CASE WHEN ${diagramCache.usedOwnKey} = true THEN 1 END)`, freeUsers: sql`COUNT(CASE WHEN ${diagramCache.usedOwnKey} = false THEN 1 END)`, }) .from(diagramCache); return stats[0]; } catch (error) { console.error("Error getting diagram stats:", error); return null; } } ================================================ FILE: src/app/_actions/repo.ts ================================================ "use server"; import { db } from "~/server/db"; import { eq, and } from "drizzle-orm"; import { diagramCache } from "~/server/db/schema"; export async function getLastGeneratedDate(username: string, repo: string) { const result = await db .select() .from(diagramCache) .where( and(eq(diagramCache.username, username), eq(diagramCache.repo, repo)), ); return result[0]?.updatedAt; } ================================================ FILE: src/app/api/generate/cost/route.ts ================================================ import { NextResponse } from "next/server"; import { toTaggedMessage } from "~/server/generate/format"; import { getGithubData } from "~/server/generate/github"; import { getModel } from "~/server/generate/model-config"; import { countInputTokens, estimateTokens } from "~/server/generate/openai"; import { SYSTEM_FIRST_PROMPT } from "~/server/generate/prompts"; import { estimateTextTokenCostUsd } from "~/server/generate/pricing"; import { generateRequestSchema } from "~/server/generate/types"; export const runtime = "nodejs"; export const dynamic = "force-dynamic"; export const maxDuration = 300; const MULTI_STAGE_INPUT_MULTIPLIER = 2; const INPUT_OVERHEAD_TOKENS = 3000; const ESTIMATED_OUTPUT_TOKENS = 8000; async function estimateRepoInputTokens( model: string, fileTree: string, readme: string, apiKey?: string, ) { try { return await countInputTokens({ model, systemPrompt: SYSTEM_FIRST_PROMPT, userPrompt: toTaggedMessage({ file_tree: fileTree, readme, }), apiKey, reasoningEffort: "medium", }); } catch { return estimateTokens(`${fileTree}\n${readme}`); } } export async function POST(request: Request) { try { const parsed = generateRequestSchema.safeParse(await request.json()); if (!parsed.success) { return NextResponse.json({ ok: false, error: "Invalid request payload.", error_code: "VALIDATION_ERROR", }); } const { username, repo, api_key: apiKey, github_pat: githubPat, } = parsed.data; const githubData = await getGithubData(username, repo, githubPat); const model = getModel(); const baseInputTokens = await estimateRepoInputTokens( model, githubData.fileTree, githubData.readme, apiKey, ); const estimatedInputTokens = baseInputTokens * MULTI_STAGE_INPUT_MULTIPLIER + INPUT_OVERHEAD_TOKENS; const estimatedOutputTokens = ESTIMATED_OUTPUT_TOKENS; const { costUsd, pricingModel, pricing } = estimateTextTokenCostUsd( model, estimatedInputTokens, estimatedOutputTokens, ); return NextResponse.json({ ok: true, cost: `$${costUsd.toFixed(2)} USD`, model, pricing_model: pricingModel, estimated_input_tokens: estimatedInputTokens, estimated_output_tokens: estimatedOutputTokens, pricing: { input_per_million_usd: pricing.inputPerMillionUsd, output_per_million_usd: pricing.outputPerMillionUsd, }, }); } catch (error) { return NextResponse.json({ ok: false, error: error instanceof Error ? error.message : "Failed to estimate generation cost.", error_code: "COST_ESTIMATION_FAILED", }); } } ================================================ FILE: src/app/api/generate/stream/route.ts ================================================ import { getModel } from "~/server/generate/model-config"; import { extractComponentMapping, processClickEvents, stripMermaidCodeFences, toTaggedMessage, } from "~/server/generate/format"; import { getGithubData } from "~/server/generate/github"; import { formatValidationFeedback, validateMermaidSyntax, } from "~/server/generate/mermaid"; import { countInputTokens, estimateTokens, streamCompletion, } from "~/server/generate/openai"; import { SYSTEM_FIRST_PROMPT, SYSTEM_FIX_MERMAID_PROMPT, SYSTEM_SECOND_PROMPT, SYSTEM_THIRD_PROMPT, } from "~/server/generate/prompts"; import { generateRequestSchema, sseMessage } from "~/server/generate/types"; export const runtime = "nodejs"; export const dynamic = "force-dynamic"; export const maxDuration = 300; const MAX_MERMAID_FIX_ATTEMPTS = 3; function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } async function estimateRepoTokenCount( model: string, fileTree: string, readme: string, apiKey?: string, ) { try { return await countInputTokens({ model, systemPrompt: SYSTEM_FIRST_PROMPT, userPrompt: toTaggedMessage({ file_tree: fileTree, readme, }), apiKey, reasoningEffort: "medium", }); } catch { return estimateTokens(`${fileTree}\n${readme}`); } } export async function POST(request: Request) { const parsed = generateRequestSchema.safeParse(await request.json()); if (!parsed.success) { return new Response( JSON.stringify({ ok: false, error: "Invalid request payload.", error_code: "VALIDATION_ERROR", }), { status: 400, headers: { "Content-Type": "application/json" } }, ); } const { username, repo, api_key: apiKey, github_pat: githubPat } = parsed.data; const encoder = new TextEncoder(); const stream = new ReadableStream({ start(controller) { const send = (payload: Record) => { controller.enqueue(encoder.encode(sseMessage(payload))); }; const run = async () => { try { const githubData = await getGithubData(username, repo, githubPat); const model = getModel(); const tokenCount = await estimateRepoTokenCount( model, githubData.fileTree, githubData.readme, apiKey, ); send({ status: "started", message: "Starting generation process...", }); if (tokenCount > 50000 && tokenCount < 195000 && !apiKey) { send({ status: "error", error: "File tree and README combined exceeds token limit (50,000). This repository is too large for free generation. Provide your own OpenAI API key to continue.", error_code: "API_KEY_REQUIRED", }); controller.close(); return; } if (tokenCount > 195000) { send({ status: "error", error: "Repository is too large (>195k tokens) for analysis. Try a smaller repo.", error_code: "TOKEN_LIMIT_EXCEEDED", }); controller.close(); return; } send({ status: "explanation_sent", message: `Sending explanation request to ${model}...`, }); await sleep(80); send({ status: "explanation", message: "Analyzing repository structure...", }); let explanation = ""; for await (const chunk of streamCompletion({ model, systemPrompt: SYSTEM_FIRST_PROMPT, userPrompt: toTaggedMessage({ file_tree: githubData.fileTree, readme: githubData.readme, }), apiKey, reasoningEffort: "medium", })) { explanation += chunk; send({ status: "explanation_chunk", chunk }); } send({ status: "mapping_sent", message: `Sending component mapping request to ${model}...`, }); await sleep(80); send({ status: "mapping", message: "Creating component mapping...", }); let fullMappingResponse = ""; for await (const chunk of streamCompletion({ model, systemPrompt: SYSTEM_SECOND_PROMPT, userPrompt: toTaggedMessage({ explanation, file_tree: githubData.fileTree, }), apiKey, reasoningEffort: "low", })) { fullMappingResponse += chunk; send({ status: "mapping_chunk", chunk }); } const componentMapping = extractComponentMapping(fullMappingResponse); send({ status: "diagram_sent", message: `Sending diagram generation request to ${model}...`, }); await sleep(80); send({ status: "diagram", message: "Generating diagram...", }); let mermaidCode = ""; for await (const chunk of streamCompletion({ model, systemPrompt: SYSTEM_THIRD_PROMPT, userPrompt: toTaggedMessage({ explanation, component_mapping: componentMapping, }), apiKey, reasoningEffort: "low", })) { mermaidCode += chunk; send({ status: "diagram_chunk", chunk }); } let candidateDiagram = stripMermaidCodeFences(mermaidCode); let validationResult = await validateMermaidSyntax(candidateDiagram); const hadFixLoop = !validationResult.valid; if (!validationResult.valid) { const parserFeedback = formatValidationFeedback(validationResult); send({ status: "diagram_fixing", message: "Diagram generated. Mermaid syntax validation failed, starting auto-fix loop...", parser_error: parserFeedback, }); } for ( let attempt = 1; !validationResult.valid && attempt <= MAX_MERMAID_FIX_ATTEMPTS; attempt++ ) { const parserFeedback = formatValidationFeedback(validationResult); send({ status: "diagram_fix_attempt", message: `Fixing Mermaid syntax (attempt ${attempt}/${MAX_MERMAID_FIX_ATTEMPTS})...`, fix_attempt: attempt, fix_max_attempts: MAX_MERMAID_FIX_ATTEMPTS, parser_error: parserFeedback, }); let repairedDiagram = ""; for await (const chunk of streamCompletion({ model, systemPrompt: SYSTEM_FIX_MERMAID_PROMPT, userPrompt: toTaggedMessage({ mermaid_code: candidateDiagram, parser_error: parserFeedback, explanation, component_mapping: componentMapping, }), apiKey, reasoningEffort: "low", })) { repairedDiagram += chunk; send({ status: "diagram_fix_chunk", chunk, fix_attempt: attempt, fix_max_attempts: MAX_MERMAID_FIX_ATTEMPTS, }); } candidateDiagram = stripMermaidCodeFences(repairedDiagram); send({ status: "diagram_fix_validating", message: `Validating Mermaid syntax after attempt ${attempt}/${MAX_MERMAID_FIX_ATTEMPTS}...`, fix_attempt: attempt, fix_max_attempts: MAX_MERMAID_FIX_ATTEMPTS, }); validationResult = await validateMermaidSyntax(candidateDiagram); } if (!validationResult.valid) { send({ status: "error", error: "Generated Mermaid remained syntactically invalid after auto-fix attempts. Please retry generation.", error_code: "MERMAID_SYNTAX_UNRESOLVED", parser_error: formatValidationFeedback(validationResult), }); return; } const processedDiagram = processClickEvents( candidateDiagram, username, repo, githubData.defaultBranch, ); if (hadFixLoop) { send({ status: "diagram_fixing", message: "Mermaid syntax validated. Finalizing diagram output...", }); } send({ status: "complete", diagram: processedDiagram, explanation, mapping: componentMapping, }); } catch (error) { send({ status: "error", error: error instanceof Error ? error.message : "Streaming generation failed.", error_code: "STREAM_FAILED", }); } finally { controller.close(); } }; void run(); }, }); return new Response(stream, { headers: { "Content-Type": "text/event-stream; charset=utf-8", "Cache-Control": "no-cache, no-transform", Connection: "keep-alive", "X-Accel-Buffering": "no", }, }); } ================================================ FILE: src/app/api/healthz/route.ts ================================================ import { NextResponse } from "next/server"; export const runtime = "nodejs"; export const dynamic = "force-dynamic"; export async function GET() { return NextResponse.json({ ok: true, status: "ok" }); } ================================================ FILE: src/app/layout.tsx ================================================ import "~/styles/globals.css"; import { GeistSans } from "geist/font/sans"; import { type Metadata } from "next"; import { Header } from "~/components/header"; import { Footer } from "~/components/footer"; import { CSPostHogProvider } from "./providers"; import { Toaster } from "~/components/ui/sonner"; export const metadata: Metadata = { title: "GitDiagram", description: "Turn any GitHub repository into an interactive diagram for visualization in seconds.", metadataBase: new URL("https://gitdiagram.com"), keywords: [ "github", "git diagram", "git diagram generator", "git diagram tool", "git diagram maker", "git diagram creator", "git diagram", "diagram", "repository", "visualization", "code structure", "system design", "software architecture", "software design", "software engineering", "software development", "software architecture", "software design", "software engineering", "software development", "open source", "open source software", "ahmedkhaleel2004", "ahmed khaleel", "gitdiagram", "gitdiagram.com", ], authors: [ { name: "Ahmed Khaleel", url: "https://github.com/ahmedkhaleel2004" }, ], creator: "Ahmed Khaleel", openGraph: { type: "website", locale: "en_US", url: "https://gitdiagram.com", title: "GitDiagram - Repository to Diagram in Seconds", description: "Turn any GitHub repository into an interactive diagram for visualization.", siteName: "GitDiagram", images: [ { url: "/og-image.png", // You'll need to create this image width: 1200, height: 630, alt: "GitDiagram - Repository Visualization Tool", }, ], }, robots: { index: true, follow: true, googleBot: { index: true, follow: true, "max-snippet": -1, }, }, }; export default function RootLayout({ children, }: Readonly<{ children: React.ReactNode }>) { return (
{children}