Repository: AsyncFuncAI/deepwiki-open
Branch: main
Commit: 4c6a1f7899ae
Files: 109
Total size: 1.0 MB

Directory structure:
gitextract_27f935c4/

├── .dockerignore
├── .github/
│   └── workflows/
│       └── docker-build-push.yml
├── .gitignore
├── .python-version
├── .vscode/
│   └── launch.json
├── Dockerfile
├── Dockerfile-ollama-local
├── LICENSE
├── Ollama-instruction.md
├── README.es.md
├── README.fr.md
├── README.ja.md
├── README.kr.md
├── README.md
├── README.pt-br.md
├── README.ru.md
├── README.vi.md
├── README.zh-tw.md
├── README.zh.md
├── api/
│   ├── README.md
│   ├── __init__.py
│   ├── api.py
│   ├── azureai_client.py
│   ├── bedrock_client.py
│   ├── config/
│   │   ├── embedder.json
│   │   ├── embedder.json.bak
│   │   ├── embedder.ollama.json.bak
│   │   ├── embedder.openai_compatible.json.bak
│   │   ├── generator.json
│   │   ├── lang.json
│   │   └── repo.json
│   ├── config.py
│   ├── dashscope_client.py
│   ├── data_pipeline.py
│   ├── google_embedder_client.py
│   ├── logging_config.py
│   ├── main.py
│   ├── ollama_patch.py
│   ├── openai_client.py
│   ├── openrouter_client.py
│   ├── prompts.py
│   ├── pyproject.toml
│   ├── rag.py
│   ├── simple_chat.py
│   ├── tools/
│   │   └── embedder.py
│   └── websocket_wiki.py
├── docker-compose.yml
├── eslint.config.mjs
├── next.config.ts
├── package.json
├── postcss.config.mjs
├── pytest.ini
├── run.sh
├── src/
│   ├── app/
│   │   ├── [owner]/
│   │   │   └── [repo]/
│   │   │       ├── page.tsx
│   │   │       ├── slides/
│   │   │       │   └── page.tsx
│   │   │       └── workshop/
│   │   │           └── page.tsx
│   │   ├── api/
│   │   │   ├── auth/
│   │   │   │   ├── status/
│   │   │   │   │   └── route.ts
│   │   │   │   └── validate/
│   │   │   │       └── route.ts
│   │   │   ├── chat/
│   │   │   │   └── stream/
│   │   │   │       └── route.ts
│   │   │   ├── models/
│   │   │   │   └── config/
│   │   │   │       └── route.ts
│   │   │   └── wiki/
│   │   │       └── projects/
│   │   │           └── route.ts
│   │   ├── globals.css
│   │   ├── layout.tsx
│   │   ├── page.tsx
│   │   └── wiki/
│   │       └── projects/
│   │           └── page.tsx
│   ├── components/
│   │   ├── Ask.tsx
│   │   ├── ConfigurationModal.tsx
│   │   ├── Markdown.tsx
│   │   ├── Mermaid.tsx
│   │   ├── ModelSelectionModal.tsx
│   │   ├── ProcessedProjects.tsx
│   │   ├── TokenInput.tsx
│   │   ├── UserSelector.tsx
│   │   ├── WikiTreeView.tsx
│   │   ├── WikiTypeSelector.tsx
│   │   └── theme-toggle.tsx
│   ├── contexts/
│   │   └── LanguageContext.tsx
│   ├── hooks/
│   │   └── useProcessedProjects.ts
│   ├── i18n.ts
│   ├── messages/
│   │   ├── en.json
│   │   ├── es.json
│   │   ├── fr.json
│   │   ├── ja.json
│   │   ├── kr.json
│   │   ├── pt-br.json
│   │   ├── ru.json
│   │   ├── vi.json
│   │   ├── zh-tw.json
│   │   └── zh.json
│   ├── types/
│   │   ├── repoinfo.tsx
│   │   └── wiki/
│   │       ├── wikipage.tsx
│   │       └── wikistructure.tsx
│   └── utils/
│       ├── getRepoUrl.tsx
│       ├── urlDecoder.tsx
│       └── websocketClient.ts
├── tailwind.config.js
├── test/
│   ├── __init__.py
│   └── test_extract_repo_name.py
├── tests/
│   ├── README.md
│   ├── __init__.py
│   ├── api/
│   │   ├── __init__.py
│   │   └── test_api.py
│   ├── integration/
│   │   ├── __init__.py
│   │   └── test_full_integration.py
│   ├── run_tests.py
│   └── unit/
│       ├── __init__.py
│       ├── test_all_embedders.py
│       └── test_google_embedder.py
└── tsconfig.json

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
# Git
.git
.gitignore
.github

# Node.js
node_modules
npm-debug.log
yarn-debug.log
yarn-error.log

# Next.js
.next
out

# Python cache files (but keep api/ directory)
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Keep api/ directory but exclude cache
api/__pycache__/
api/*.pyc

# Environment variables
# .env is now allowed to be included in the build
.env.local
.env.development.local
.env.test.local
.env.production.local

# Docker
Dockerfile
docker-compose.yml
.dockerignore

# Misc
.DS_Store
*.pem
README.md
LICENSE
screenshots/
*.md
!api/README.md


================================================
FILE: .github/workflows/docker-build-push.yml
================================================
name: Build and Push Docker Image

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]
  # Allow manual trigger
  workflow_dispatch:

env:
  REGISTRY: ghcr.io
  IMAGE_NAME: ${{ github.repository }}
  
concurrency:
  # This concurrency group ensures that only one job in the group runs at a time.
  # If a new job is triggered, the previous one will be canceled.
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true
  
jobs:
  build-and-push:
    strategy:
      matrix:
        include:
          - os: ubuntu-latest
            platform: linux/amd64
          - os: ubuntu-24.04-arm
            platform: linux/arm64
    runs-on: ${{ matrix.os }}
    permissions:
      contents: read
      packages: write

    steps:
      - name: Prepare environment for current platform
        id: prepare
        run: |
          platform=${{ matrix.platform }}
          echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
          echo "GHCR_IMAGE=ghcr.io/${GITHUB_REPOSITORY@L}" >> $GITHUB_ENV
      
      - name: Checkout repository
        uses: actions/checkout@v4

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Log in to the Container registry
        if: github.event_name != 'pull_request'
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Extract metadata (tags, labels) for Docker
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_IMAGE }}

      - name: Create empty .env file for build
        run: touch .env

      - name: Build and push Docker image
        uses: docker/build-push-action@v6
        id: build
        with:
          context: .
          platforms: ${{ matrix.platform }}
          push: ${{ github.event_name != 'pull_request' }}
          annotations: ${{ steps.meta.outputs.annotations }}
          labels: ${{ steps.meta.outputs.labels }}
          outputs: type=image,name=${{ env.GHCR_IMAGE }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }},oci-mediatypes=true
          cache-from: type=gha,scope=${{ github.repository }}-${{ github.ref_name }}-${{ matrix.platform }}
          cache-to: type=gha,mode=max,scope=${{ github.repository }}-${{ github.ref_name }}-${{ matrix.platform }}
          
      - name: Export digest
        run: |
          mkdir -p /tmp/digests
          digest="${{ steps.build.outputs.digest }}"
          touch "/tmp/digests/${digest#sha256:}"

      - name: Upload digest 
        uses: actions/upload-artifact@v4
        with:
          name: digests-${{ env.PLATFORM_PAIR }}
          path: /tmp/digests/*
          if-no-files-found: error
          retention-days: 1
  
  merge:
    name: merge Docker manifests
    runs-on: ubuntu-latest
    if: github.event_name != 'pull_request'
    permissions:
      contents: read
      packages: write

    needs:
      - build-and-push
    steps:
      - name: Prepare environment
        id: prepare
        run: |
          echo "GHCR_IMAGE=ghcr.io/${GITHUB_REPOSITORY@L}" >> $GITHUB_ENV
      
      - name: Download digests
        uses: actions/download-artifact@v4
        with:
          path: /tmp/digests
          pattern: digests-*
          merge-multiple: true


      - name: Docker meta
        id: meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_IMAGE }}
          annotations: |
            type=org.opencontainers.image.description,value=${{ github.event.repository.description || 'No description provided' }}
          tags: |
            type=semver,pattern={{version}}
            type=semver,pattern={{major}}.{{minor}}
            type=sha,format=short
            type=ref,event=branch
            type=ref,event=pr
            latest

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
        with:
          driver-opts: |
            network=host

      - name: Login to GitHub Container Registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Get execution timestamp with RFC3339 format
        id: timestamp
        run: |
          echo "timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")" >> $GITHUB_OUTPUT

      - name: Create manifest list and pushs
        working-directory: /tmp/digests
        id: manifest-annotate
        continue-on-error: true
        run: |
              docker buildx imagetools create \
                $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
                --annotation='index:org.opencontainers.image.description=${{ github.event.repository.description }}' \
                --annotation='index:org.opencontainers.image.created=${{ steps.timestamp.outputs.timestamp }}' \
                --annotation='index:org.opencontainers.image.url=${{ github.event.repository.url }}' \
                --annotation='index:org.opencontainers.image.source=${{ github.event.repository.url }}' \
                $(printf '${{ env.GHCR_IMAGE }}@sha256:%s ' *)

      - name: Create manifest list and push without annotations
        if: steps.manifest-annotate.outcome == 'failure'
        working-directory: /tmp/digests
        run: |
              docker buildx imagetools create  $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
                $(printf '${{ env.GHCR_IMAGE }}@sha256:%s ' *)

      - name: Inspect image
        id: inspect
        run: |
          docker buildx imagetools inspect '${{ env.GHCR_IMAGE }}:${{ steps.meta.outputs.version }}'


================================================
FILE: .gitignore
================================================
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.

# dependencies
/node_modules
/.pnp
.pnp.*
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/versions

# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
api/logs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
*.venv
# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
.DS_Store
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# env files (can opt-in for committing if needed)
.env*

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts

.idea/

# ignore adding self-signed certs
certs/


================================================
FILE: .python-version
================================================
3.12


================================================
FILE: .vscode/launch.json
================================================
{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Deepwiki-Open",
            "type": "python",
            "request": "launch",
            "module": "uvicorn",
            "args": [
                "api.api:app",
                "--reload",
                "--port",
                "8001"
            ],
            "jinja": true,
            "justMyCode": true
        }
    ]
}


================================================
FILE: Dockerfile
================================================
# syntax=docker/dockerfile:1-labs

# Build argument for custom certificates directory
ARG CUSTOM_CERT_DIR="certs"

FROM node:20-alpine3.22 AS node_base

FROM node_base AS node_deps
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --legacy-peer-deps

FROM node_base AS node_builder
WORKDIR /app
COPY --from=node_deps /app/node_modules ./node_modules
# Copy only necessary files for Next.js build
COPY package.json package-lock.json next.config.ts tsconfig.json tailwind.config.js postcss.config.mjs ./
COPY src/ ./src/
COPY public/ ./public/
# Increase Node.js memory limit for build and disable telemetry
ENV NODE_OPTIONS="--max-old-space-size=4096"
ENV NEXT_TELEMETRY_DISABLED=1
RUN NODE_ENV=production npm run build

FROM python:3.11-slim AS py_deps
WORKDIR /api
COPY api/pyproject.toml .
COPY api/poetry.lock .
RUN python -m pip install poetry==2.0.1 --no-cache-dir && \
    poetry config virtualenvs.create true --local && \
    poetry config virtualenvs.in-project true --local && \
    poetry config virtualenvs.options.always-copy --local true && \
    POETRY_MAX_WORKERS=10 poetry install --no-interaction --no-ansi --only main && \
    poetry cache clear --all .

# Use Python 3.11 as final image
FROM python:3.11-slim

# Set working directory
WORKDIR /app

# Install Node.js and npm
RUN apt-get update && apt-get install -y \
    curl \
    gnupg \
    git \
    ca-certificates \
    && mkdir -p /etc/apt/keyrings \
    && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
    && apt-get update \
    && apt-get install -y nodejs \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

# Update certificates if custom ones were provided and copied successfully
RUN if [ -n "${CUSTOM_CERT_DIR}" ]; then \
        mkdir -p /usr/local/share/ca-certificates && \
        if [ -d "${CUSTOM_CERT_DIR}" ]; then \
            cp -r ${CUSTOM_CERT_DIR}/* /usr/local/share/ca-certificates/ 2>/dev/null || true; \
            update-ca-certificates; \
            echo "Custom certificates installed successfully."; \
        else \
            echo "Warning: ${CUSTOM_CERT_DIR} not found. Skipping certificate installation."; \
        fi \
    fi

ENV PATH="/opt/venv/bin:$PATH"

# Copy Python dependencies
COPY --from=py_deps /api/.venv /opt/venv
COPY api/ ./api/

# Copy Node app
COPY --from=node_builder /app/public ./public
COPY --from=node_builder /app/.next/standalone ./
COPY --from=node_builder /app/.next/static ./.next/static

# Expose the port the app runs on
EXPOSE ${PORT:-8001} 3000

# Create a script to run both backend and frontend
RUN echo '#!/bin/bash\n\
# Load environment variables from .env file if it exists\n\
if [ -f .env ]; then\n\
  export $(grep -v "^#" .env | xargs -r)\n\
fi\n\
\n\
# Check for required environment variables\n\
if [ -z "$OPENAI_API_KEY" ] || [ -z "$GOOGLE_API_KEY" ]; then\n\
  echo "Warning: OPENAI_API_KEY and/or GOOGLE_API_KEY environment variables are not set."\n\
  echo "These are required for DeepWiki to function properly."\n\
  echo "You can provide them via a mounted .env file or as environment variables when running the container."\n\
fi\n\
\n\
# Start the API server in the background with the configured port\n\
python -m api.main --port ${PORT:-8001} &\n\
PORT=3000 HOSTNAME=0.0.0.0 node server.js &\n\
wait -n\n\
exit $?' > /app/start.sh && chmod +x /app/start.sh

# Set environment variables
ENV PORT=8001
ENV NODE_ENV=production
ENV SERVER_BASE_URL=http://localhost:${PORT:-8001}

# Create empty .env file (will be overridden if one exists at runtime)
RUN touch .env

# Command to run the application
CMD ["/app/start.sh"]


================================================
FILE: Dockerfile-ollama-local
================================================
# syntax=docker/dockerfile:1-labs

FROM node:20-alpine AS node_base

FROM node_base AS node_deps
WORKDIR /app
COPY package.json package-lock.json ./
RUN npm ci --legacy-peer-deps

FROM node_base AS node_builder
WORKDIR /app
COPY --from=node_deps /app/node_modules ./node_modules
COPY --exclude=./api . .
RUN NODE_ENV=production npm run build

FROM python:3.11-slim AS py_deps
WORKDIR /api
COPY api/pyproject.toml .
COPY api/poetry.lock .
RUN python -m pip install poetry==2.0.1 --no-cache-dir && \
    poetry config virtualenvs.create true --local && \
    poetry config virtualenvs.in-project true --local && \
    poetry config virtualenvs.options.always-copy --local true && \
    POETRY_MAX_WORKERS=10 poetry install --no-interaction --no-ansi --only main && \
    poetry cache clear --all .

FROM python:3.11-slim AS ollama_base
RUN apt-get update && apt-get install -y --no-install-recommends \
    curl zstd && rm -rf /var/lib/apt/lists/*

# Detect architecture and download appropriate Ollama version
# ARG TARGETARCH can be set at build time with --build-arg TARGETARCH=arm64 or TARGETARCH=amd64
ARG TARGETARCH=arm64
RUN OLLAMA_ARCH="" && \
    if [ "$TARGETARCH" = "arm64" ]; then \
        echo "Building for ARM64 architecture." && \
        OLLAMA_ARCH="arm64"; \
    elif [ "$TARGETARCH" = "amd64" ]; then \
        echo "Building for AMD64 architecture." && \
        OLLAMA_ARCH="amd64"; \
    else \
        echo "Error: Unsupported architecture '$TARGETARCH'. Supported architectures are 'arm64' and 'amd64'." >&2 && \
        exit 1; \
    fi && \
    (set -o pipefail; \
     curl -fL "https://ollama.com/download/ollama-linux-${OLLAMA_ARCH}.tar.zst" \
      | zstd -d | tar -x -C /usr)

RUN ollama serve > /dev/null 2>&1 & \
    sleep 20 && \
    ollama pull nomic-embed-text && \
    ollama pull qwen3:1.7b

# Use Python 3.11 as final image
FROM python:3.11-slim

# Set working directory
WORKDIR /app

# Install Node.js and npm
RUN apt-get update && apt-get install -y \
    curl \
    gnupg \
    git \
    && mkdir -p /etc/apt/keyrings \
    && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
    && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list \
    && apt-get update \
    && apt-get install -y nodejs \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

ENV PATH="/opt/venv/bin:$PATH"

# Copy Python dependencies
COPY --from=py_deps /api/.venv /opt/venv
COPY api/ ./api/

# Copy Node app
COPY --from=node_builder /app/public ./public
COPY --from=node_builder /app/.next/standalone ./
COPY --from=node_builder /app/.next/static ./.next/static
COPY --from=ollama_base /usr/bin/ollama /usr/local/bin/
COPY --from=ollama_base /root/.ollama /root/.ollama

# Expose the port the app runs on
EXPOSE ${PORT:-8001} 3000

# Create a script to run both backend and frontend
RUN echo '#!/bin/bash\n\
# Start ollama serve in background\n\
ollama serve > /dev/null 2>&1 &\n\
\n\
# Load environment variables from .env file if it exists\n\
if [ -f .env ]; then\n\
  export $(grep -v "^#" .env | xargs -r)\n\
fi\n\
\n\
# Check for required environment variables\n\
if [ -z "$OPENAI_API_KEY" ] || [ -z "$GOOGLE_API_KEY" ]; then\n\
  echo "Warning: OPENAI_API_KEY and/or GOOGLE_API_KEY environment variables are not set."\n\
  echo "These are required for DeepWiki to function properly."\n\
  echo "You can provide them via a mounted .env file or as environment variables when running the container."\n\
fi\n\
\n\
# Start the API server in the background with the configured port\n\
python -m api.main --port ${PORT:-8001} &\n\
PORT=3000 HOSTNAME=0.0.0.0 node server.js &\n\
wait -n\n\
exit $?' > /app/start.sh && chmod +x /app/start.sh

# Set environment variables
ENV PORT=8001
ENV NODE_ENV=production
ENV SERVER_BASE_URL=http://localhost:${PORT:-8001}

# Create empty .env file (will be overridden if one exists at runtime)
RUN touch .env

# Command to run the application
CMD ["/app/start.sh"]


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 Sheing Ng

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: Ollama-instruction.md
================================================
# Using DeepWiki with Ollama: Beginner's Guide

DeepWiki supports local AI models through Ollama, which is perfect if you want to:

- Run everything locally without relying on cloud APIs
- Avoid API costs from OpenAI or Google
- Have more privacy with your code analysis

## Step 1: Install Ollama

### For Windows
- Download Ollama from the [official website](https://ollama.com/download)
- Run the installer and follow the on-screen instructions
- After installation, Ollama will run in the background (check your system tray)

### For macOS
- Download Ollama from the [official website](https://ollama.com/download)
- Open the downloaded file and drag Ollama to your Applications folder
- Launch Ollama from your Applications folder

### For Linux
- Run the following command:
  ```bash
  curl -fsSL https://ollama.com/install.sh | sh
  ```

## Step 2: Download Required Models

Open a terminal (Command Prompt or PowerShell on Windows) and run:

```bash
ollama pull nomic-embed-text
ollama pull qwen3:1.7b
```

The first command downloads the embedding model that DeepWiki uses to understand your code. The second downloads a small but capable language model for generating documentation.

## Step 3: Set Up DeepWiki

Clone the DeepWiki repository:
```bash
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open
```

Create a `.env` file in the project root:
```
# No need for API keys when using Ollama locally
PORT=8001
# Optionally, provide OLLAMA_HOST if Ollama is not local
OLLAMA_HOST=your_ollama_host # (default: http://localhost:11434)
```

Configure the Local Embedder for Ollama:
```
cp api/config/embedder.ollama.json.bak api/config/embedder.json
# overwrite api/config/embedder.json? (y/n [n]) y
```

Start the backend:
```bash
python -m pip install poetry==2.0.1 && poetry install
python -m api.main
```

Start the frontend:
```bash
npm install
npm run dev
```

## Step 4: Use DeepWiki with Ollama

1. Open http://localhost:3000 in your browser
2. Enter a GitHub, GitLab, or Bitbucket repository URL
3. Check the use "Local Ollama Model" option
4. Click "Generate Wiki"

![Ollama Option](screenshots/Ollama.png)

## Alternative using Dockerfile

1. Configure the Local Embedder for Ollama:
```
cp api/config/embedder.ollama.json.bak api/config/embedder.json
# overwrite api/config/embedder.json? (y/n [n]) y
```

2. Build the docker image `docker build -f Dockerfile-ollama-local -t deepwiki:ollama-local .`
3. Run the container:
   ```bash
   # For regular use
   docker run -p 3000:3000 -p 8001:8001 --name deepwiki \
     -v ~/.adalflow:/root/.adalflow \
     -e OLLAMA_HOST=your_ollama_host \
     deepwiki:ollama-local
   
   # For local repository analysis
   docker run -p 3000:3000 -p 8001:8001 --name deepwiki \
     -v ~/.adalflow:/root/.adalflow \
     -e OLLAMA_HOST=your_ollama_host \
     -v /path/to/your/repo:/app/local-repos/repo-name \
     deepwiki:ollama-local
   ```

4. When using local repositories in the interface: use `/app/local-repos/repo-name` as the local repository path.

5. Open http://localhost:3000 in your browser

Note: For Apple Silicon Macs, the Dockerfile automatically uses ARM64 binaries for better performance.

## How It Works

When you select "Use Local Ollama", DeepWiki will:

1. Use the `nomic-embed-text` model for creating embeddings of your code
2. Use the `qwen3:1.7b` model for generating documentation
3. Process everything locally on your machine

## Troubleshooting

### "Cannot connect to Ollama server"
- Make sure Ollama is running in the background. You can check by running `ollama list` in your terminal.
- Verify that Ollama is running on the default port (11434)
- Try restarting Ollama

### Slow generation
- Local models are typically slower than cloud APIs. Consider using a smaller repository or a more powerful computer.
- The `qwen3:1.7b` model is optimized for speed and quality balance. Larger models will be slower but may produce better results.

### Out of memory errors
- If you encounter memory issues, try using a smaller model like `phi3:mini` instead of larger models.
- Close other memory-intensive applications while running Ollama

## Advanced: Using Different Models

If you want to try different models, you can modify the `api/config/generator.json` file:

```python
"generator_ollama": {
    "model_client": OllamaClient,
    "model_kwargs": {
        "model": "qwen3:1.7b",  # Change this to another model
        "options": {
            "temperature": 0.7,
            "top_p": 0.8,
        }
    },
},
```

You can replace `"model": "qwen3:1.7b"` with any model you've pulled with Ollama. For a list of available models, visit [Ollama's model library](https://ollama.com/library) or run `ollama list` in your terminal.

Similarly, you can change the embedding model:

```python
"embedder_ollama": {
    "model_client": OllamaClient,
    "model_kwargs": {
        "model": "nomic-embed-text"  # Change this to another embedding model
    },
},
```

## Performance Considerations

### Hardware Requirements

For optimal performance with Ollama:
- **CPU**: 4+ cores recommended
- **RAM**: 8GB minimum, 16GB+ recommended
- **Storage**: 10GB+ free space for models
- **GPU**: Optional but highly recommended for faster processing

### Model Selection Guide

| Model | Size | Speed | Quality | Use Case |
|-------|------|-------|---------|----------|
| phi3:mini | 1.3GB | Fast | Good | Small projects, quick testing |
| qwen3:1.7b | 3.8GB | Medium | Better | Default, good balance |
| llama3:8b | 8GB | Slow | Best | Complex projects, detailed analysis |

## Limitations

When using Ollama with DeepWiki:

1. **No Internet Access**: The models run completely offline and cannot access external information
2. **Limited Context Window**: Local models typically have smaller context windows than cloud APIs
3. **Less Powerful**: Local models may not match the quality of the latest cloud models

## Conclusion

Using DeepWiki with Ollama gives you a completely local, private solution for code documentation. While it may not match the speed or quality of cloud-based solutions, it provides a free and privacy-focused alternative that works well for most projects.

Enjoy using DeepWiki with your local Ollama models!


================================================
FILE: README.es.md
================================================
# DeepWiki-Open

![Banner de DeepWiki](screenshots/Deepwiki.png)

**DeepWiki** crea automáticamente wikis hermosas e interactivas para cualquier repositorio de GitHub, GitLab o BitBucket. ¡Solo ingresa el nombre de un repositorio y DeepWiki:

1. Analizará la estructura del código
2. Generará documentación completa
3. Creará diagramas visuales para explicar cómo funciona todo
4. Organizará todo en una wiki fácil de navegar

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)

[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Características

- **Documentación Instantánea**: Convierte cualquier repositorio de GitHub, GitLab o BitBucket en una wiki en segundos
- **Soporte para Repositorios Privados**: Accede de forma segura a repositorios privados con tokens de acceso personal
- **Análisis Inteligente**: Comprensión de la estructura y relaciones del código impulsada por IA
- **Diagramas Hermosos**: Diagramas Mermaid automáticos para visualizar la arquitectura y el flujo de datos
- **Navegación Sencilla**: Interfaz simple e intuitiva para explorar la wiki
- **Función de Preguntas**: Chatea con tu repositorio usando IA potenciada por RAG para obtener respuestas precisas
- **Investigación Profunda**: Proceso de investigación de múltiples turnos que examina a fondo temas complejos
- **Múltiples Proveedores de Modelos**: Soporte para Google Gemini, OpenAI, OpenRouter y modelos locales de Ollama

## 🚀 Inicio Rápido (¡Súper Fácil!)

### Opción 1: Usando Docker

```bash
# Clonar el repositorio
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Crear un archivo .env con tus claves API
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# Opcional: Añadir clave API de OpenRouter si quieres usar modelos de OpenRouter
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# Ejecutar con Docker Compose
docker-compose up
```

(Los comandos de Docker anteriores, así como la configuración de `docker-compose.yml`, montan el directorio `~/.adalflow` de tu host en `/root/.adalflow` dentro del contenedor. Esta ruta se utiliza para almacenar:
- Repositorios clonados (`~/.adalflow/repos/`)
- Sus embeddings e índices (`~/.adalflow/databases/`)
- Contenido de wiki generado y cacheado (`~/.adalflow/wikicache/`)

Esto asegura que tus datos persistan incluso si el contenedor se detiene o se elimina.)

> 💡 **Dónde obtener estas claves:**
> - Obtén una clave API de Google en [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Obtén una clave API de OpenAI en [OpenAI Platform](https://platform.openai.com/api-keys)

### Opción 2: Configuración Manual (Recomendada)

#### Paso 1: Configurar tus Claves API

Crea un archivo `.env` en la raíz del proyecto con estas claves:

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# Opcional: Añade esto si quieres usar modelos de OpenRouter
OPENROUTER_API_KEY=your_openrouter_api_key
```

#### Paso 2: Iniciar el Backend

```bash
# Instalar dependencias de Python
python -m pip install poetry==2.0.1 && poetry install -C api

# Iniciar el servidor API
python -m api.main
```

#### Paso 3: Iniciar el Frontend

```bash
# Instalar dependencias de JavaScript
npm install
# o
yarn install

# Iniciar la aplicación web
npm run dev
# o
yarn dev
```

#### Paso 4: ¡Usar DeepWiki!

1. Abre [http://localhost:3000](http://localhost:3000) en tu navegador
2. Ingresa un repositorio de GitHub, GitLab o Bitbucket (como `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, o `https://bitbucket.org/redradish/atlassian_app_versions`)
3. Para repositorios privados, haz clic en "+ Agregar tokens de acceso" e ingresa tu token de acceso personal de GitHub o GitLab
4. ¡Haz clic en "Generar Wiki" y observa la magia suceder!

## 🔍 Cómo Funciona

DeepWiki usa IA para:

1. Clonar y analizar el repositorio de GitHub, GitLab o Bitbucket (incluyendo repos privados con autenticación por token)
2. Crear embeddings del código para recuperación inteligente
3. Generar documentación con IA consciente del contexto (usando modelos de Google Gemini, OpenAI, OpenRouter o Ollama local)
4. Crear diagramas visuales para explicar las relaciones del código
5. Organizar todo en una wiki estructurada
6. Habilitar preguntas y respuestas inteligentes con el repositorio a través de la función de Preguntas
7. Proporcionar capacidades de investigación en profundidad con Investigación Profunda

```mermaid
graph TD
    A[Usuario ingresa repo GitHub/GitLab/Bitbucket] --> AA{¿Repo privado?}
    AA -->|Sí| AB[Agregar token de acceso]
    AA -->|No| B[Clonar Repositorio]
    AB --> B
    B --> C[Analizar Estructura del Código]
    C --> D[Crear Embeddings del Código]

    D --> M{Seleccionar Proveedor de Modelo}
    M -->|Google Gemini| E1[Generar con Gemini]
    M -->|OpenAI| E2[Generar con OpenAI]
    M -->|OpenRouter| E3[Generar con OpenRouter]
    M -->|Ollama Local| E4[Generar con Ollama]

    E1 --> E[Generar Documentación]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[Crear Diagramas Visuales]
    E --> G[Organizar como Wiki]
    F --> G
    G --> H[DeepWiki Interactiva]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ Estructura del Proyecto

```
deepwiki/
├── api/                  # Servidor API backend
│   ├── main.py           # Punto de entrada de la API
│   ├── api.py            # Implementación FastAPI
│   ├── rag.py            # Generación Aumentada por Recuperación
│   ├── data_pipeline.py  # Utilidades de procesamiento de datos
│   └── requirements.txt  # Dependencias Python
│
├── src/                  # App frontend Next.js
│   ├── app/              # Directorio app de Next.js
│   │   └── page.tsx      # Página principal de la aplicación
│   └── components/       # Componentes React
│       └── Mermaid.tsx   # Renderizador de diagramas Mermaid
│
├── public/               # Activos estáticos
├── package.json          # Dependencias JavaScript
└── .env                  # Variables de entorno (crear este archivo)
```

## 🤖 Sistema de Selección de Modelos Basado en Proveedores

DeepWiki ahora implementa un sistema flexible de selección de modelos basado en proveedores que soporta múltiples proveedores de LLM:

### Proveedores y Modelos Soportados

- **Google**: Predeterminado `gemini-2.5-flash`, también soporta `gemini-2.5-flash-lite`, `gemini-2.5-pro`, etc.
- **OpenAI**: Predeterminado `gpt-5-nano`, también soporta `gpt-5`, `4o`, etc.
- **OpenRouter**: Acceso a múltiples modelos a través de una API unificada, incluyendo Claude, Llama, Mistral, etc.
- **Ollama**: Soporte para modelos de código abierto ejecutados localmente como `llama3`

### Variables de Entorno

Cada proveedor requiere sus correspondientes variables de entorno para las claves API:

```
# Claves API
GOOGLE_API_KEY=tu_clave_api_google        # Requerida para modelos Google Gemini
OPENAI_API_KEY=tu_clave_api_openai        # Requerida para modelos OpenAI
OPENROUTER_API_KEY=tu_clave_api_openrouter # Requerida para modelos OpenRouter

# Configuración de URL Base de OpenAI API
OPENAI_BASE_URL=https://punto-final-personalizado.com/v1  # Opcional, para endpoints personalizados de OpenAI API

# Directorio de Configuración
DEEPWIKI_CONFIG_DIR=/ruta/a/directorio/config/personalizado  # Opcional, para ubicación personalizada de archivos de configuración
```

### Archivos de Configuración

DeepWiki utiliza archivos de configuración JSON para gestionar varios aspectos del sistema:

1. **`generator.json`**: Configuración para modelos de generación de texto
   - Define los proveedores de modelos disponibles (Google, OpenAI, OpenRouter, Ollama)
   - Especifica los modelos predeterminados y disponibles para cada proveedor
   - Contiene parámetros específicos de los modelos como temperatura y top_p

2. **`embedder.json`**: Configuración para modelos de embeddings y procesamiento de texto
   - Define modelos de embeddings para almacenamiento vectorial
   - Contiene configuración del recuperador para RAG
   - Especifica ajustes del divisor de texto para fragmentación de documentos

3. **`repo.json`**: Configuración para manejo de repositorios
   - Contiene filtros de archivos para excluir ciertos archivos y directorios
   - Define límites de tamaño de repositorio y reglas de procesamiento

Por defecto, estos archivos se encuentran en el directorio `api/config/`. Puedes personalizar su ubicación usando la variable de entorno `DEEPWIKI_CONFIG_DIR`.

### Selección de Modelos Personalizados para Proveedores de Servicios

La función de selección de modelos personalizados está diseñada específicamente para proveedores de servicios que necesitan:

- Puede ofrecer a los usuarios dentro de su organización una selección de diferentes modelos de IA
- Puede adaptarse rápidamente al panorama de LLM en rápida evolución sin cambios de código
- Puede soportar modelos especializados o ajustados que no están en la lista predefinida

Usted puede implementar sus ofertas de modelos seleccionando entre las opciones predefinidas o ingresando identificadores de modelos personalizados en la interfaz frontend.

### Configuración de URL Base para Canales Privados Empresariales

La configuración de base_url del Cliente OpenAI está diseñada principalmente para usuarios empresariales con canales API privados. Esta función:

- Permite la conexión a endpoints API privados o específicos de la empresa
- Permite a las organizaciones usar sus propios servicios LLM auto-alojados o desplegados a medida
- Soporta integración con servicios de terceros compatibles con la API de OpenAI

**Próximamente**: En futuras actualizaciones, DeepWiki soportará un modo donde los usuarios deberán proporcionar sus propias claves API en las solicitudes. Esto permitirá a los clientes empresariales con canales privados utilizar sus disposiciones API existentes sin compartir credenciales con el despliegue de DeepWiki.

## 🧩 Uso de modelos de embedding compatibles con OpenAI (por ejemplo, Alibaba Qwen)

Si deseas usar modelos de embedding compatibles con la API de OpenAI (como Alibaba Qwen), sigue estos pasos:

1. Sustituye el contenido de `api/config/embedder.json` por el de `api/config/embedder_openai_compatible.json`.
2. En el archivo `.env` de la raíz del proyecto, configura las variables de entorno necesarias, por ejemplo:
   ```
   OPENAI_API_KEY=tu_api_key
   OPENAI_BASE_URL=tu_endpoint_compatible_openai
   ```
3. El programa sustituirá automáticamente los placeholders de embedder.json por los valores de tus variables de entorno.

Así puedes cambiar fácilmente a cualquier servicio de embedding compatible con OpenAI sin modificar el código.

## 🤖 Funciones de Preguntas e Investigación Profunda

### Función de Preguntas

La función de Preguntas te permite chatear con tu repositorio usando Generación Aumentada por Recuperación (RAG):

- **Respuestas Conscientes del Contexto**: Obtén respuestas precisas basadas en el código real de tu repositorio
- **Potenciada por RAG**: El sistema recupera fragmentos de código relevantes para proporcionar respuestas fundamentadas
- **Transmisión en Tiempo Real**: Ve las respuestas mientras se generan para una experiencia más interactiva
- **Historial de Conversación**: El sistema mantiene el contexto entre preguntas para interacciones más coherentes

### Función de Investigación Profunda

Investigación Profunda lleva el análisis de repositorios al siguiente nivel con un proceso de investigación de múltiples turnos:

- **Investigación en Profundidad**: Explora a fondo temas complejos a través de múltiples iteraciones de investigación
- **Proceso Estructurado**: Sigue un plan de investigación claro con actualizaciones y una conclusión completa
- **Continuación Automática**: La IA continúa automáticamente la investigación hasta llegar a una conclusión (hasta 5 iteraciones)
- **Etapas de Investigación**:
  1. **Plan de Investigación**: Describe el enfoque y los hallazgos iniciales
  2. **Actualizaciones de Investigación**: Desarrolla las iteraciones anteriores con nuevas perspectivas
  3. **Conclusión Final**: Proporciona una respuesta completa basada en todas las iteraciones

Para usar Investigación Profunda, simplemente activa el interruptor "Investigación Profunda" en la interfaz de Preguntas antes de enviar tu pregunta.

## 📱 Capturas de Pantalla

![Interfaz Principal de DeepWiki](screenshots/Interface.png)
*La interfaz principal de DeepWiki*

![Soporte para Repositorios Privados](screenshots/privaterepo.png)
*Acceso a repositorios privados con tokens de acceso personal*

![Función de Investigación Profunda](screenshots/DeepResearch.png)
*Investigación Profunda realiza investigaciones de múltiples turnos para temas complejos*

### Video de Demostración

[![Video de Demostración de DeepWiki](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*¡Mira DeepWiki en acción!*

## ❓ Solución de Problemas

### Problemas con Claves API
- **"Faltan variables de entorno"**: Asegúrate de que tu archivo `.env` esté en la raíz del proyecto y contenga las claves API requeridas
- **"Clave API no válida"**: Verifica que hayas copiado la clave completa correctamente sin espacios adicionales
- **"Error de API OpenRouter"**: Verifica que tu clave API de OpenRouter sea válida y tenga créditos suficientes

### Problemas de Conexión
- **"No se puede conectar al servidor API"**: Asegúrate de que el servidor API esté ejecutándose en el puerto 8001
- **"Error CORS"**: La API está configurada para permitir todos los orígenes, pero si tienes problemas, intenta ejecutar tanto el frontend como el backend en la misma máquina

### Problemas de Generación
- **"Error al generar wiki"**: Para repositorios muy grandes, prueba primero con uno más pequeño
- **"Formato de repositorio no válido"**: Asegúrate de usar un formato de URL válido para GitHub, GitLab o Bitbucket
- **"No se pudo obtener la estructura del repositorio"**: Para repositorios privados, asegúrate de haber ingresado un token de acceso personal válido con los permisos apropiados
- **"Error de renderizado de diagrama"**: La aplicación intentará automáticamente arreglar los diagramas rotos

### Soluciones Comunes
1. **Reiniciar ambos servidores**: A veces un simple reinicio soluciona la mayoría de los problemas
2. **Revisar los registros de la consola**: Abre las herramientas de desarrollo del navegador para ver cualquier error de JavaScript
3. **Revisar los registros de la API**: Mira la terminal donde se ejecuta la API para ver errores de Python

## 🤝 Contribuir

¡Las contribuciones son bienvenidas! Siéntete libre de:
- Abrir issues para bugs o solicitudes de funciones
- Enviar pull requests para mejorar el código
- Compartir tus comentarios e ideas

## 📄 Licencia

Este proyecto está licenciado bajo la Licencia MIT - consulta el archivo [LICENSE](LICENSE) para más detalles.

## ⭐ Historial de Estrellas

[![Gráfico de Historial de Estrellas](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.fr.md
================================================

# DeepWiki-Open

![Bannière DeepWiki](screenshots/Deepwiki.png)

**DeepWiki** est ma propre tentative d’implémentation de DeepWiki, un outil qui crée automatiquement des wikis magnifiques et interactifs pour n’importe quel dépôt GitHub, GitLab ou Bitbucket ! Il suffit d’entrer un nom de dépôt, et DeepWiki :

1. Analyse la structure du code  
2. Génère une documentation complète  
3. Crée des diagrammes visuels pour expliquer le fonctionnement  
4. Organise le tout dans un wiki facile à naviguer

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)
[![Tip in Crypto](https://tip.md/badge.svg)](https://tip.md/sng-asyncfunc)
[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Fonctionnalités

- **Documentation instantanée** : Transforme un dépôt GitHub, GitLab ou Bitbucket en wiki en quelques secondes
- **Support des dépôts privés** : Accès sécurisé avec jetons d’accès personnels
- **Analyse intelligente** : Compréhension de la structure et des relations du code via l’IA
- **Diagrammes élégants** : Diagrammes Mermaid automatiques pour visualiser l’architecture et les flux de données
- **Navigation facile** : Interface simple et intuitive
- **Fonction “Ask”** : Posez des questions à votre dépôt avec une IA alimentée par RAG
- **DeepResearch** : Processus de recherche multi-étapes pour explorer des sujets complexes
- **Multiples fournisseurs de modèles IA** : Prise en charge de Google Gemini, OpenAI, OpenRouter, et Ollama local

## 🚀 Démarrage rapide (super facile !)

### Option 1 : Avec Docker

```bash
# Cloner le dépôt
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Créer un fichier .env avec vos clés API
echo "GOOGLE_API_KEY=votre_clé_google" > .env
echo "OPENAI_API_KEY=votre_clé_openai" >> .env
# Facultatif : clé OpenRouter
echo "OPENROUTER_API_KEY=votre_clé_openrouter" >> .env
# Facultatif : hôte personnalisé Ollama
echo "OLLAMA_HOST=votre_hote_ollama" >> .env
# Facultatif : Azure OpenAI
echo "AZURE_OPENAI_API_KEY=votre_clé_azure" >> .env
echo "AZURE_OPENAI_ENDPOINT=votre_endpoint" >> .env
echo "AZURE_OPENAI_VERSION=version_api" >> .env

# Lancer avec Docker Compose
docker-compose up
```

Pour des instructions détaillées sur l’utilisation de DeepWiki avec Ollama et Docker, consultez [Ollama Instructions](Ollama-instruction.md).

> 💡 **Où obtenir ces clés :**
> - Obtenez une clé API Google depuis [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Obtenez une clé API OpenAI depuis [OpenAI Platform](https://platform.openai.com/api-keys)
> - Obtenez les identifiants Azure OpenAI depuis [Azure Portal](https://portal.azure.com/) – créez une ressource Azure OpenAI et récupérez la clé API, l’endpoint et la version de l’API

### Option 2 : Installation manuelle (Recommandée)

#### Étape 1 : Configurez vos clés API

Créez un fichier `.env` à la racine du projet avec ces clés :
```
GOOGLE_API_KEY=votre_clé_google
OPENAI_API_KEY=votre_clé_openai
# Optionnel : Ajoutez ceci pour utiliser des modèles OpenRouter
OPENROUTER_API_KEY=votre_clé_openrouter
# Optionnel : Ajoutez ceci pour utiliser des modèles Azure OpenAI
AZURE_OPENAI_API_KEY=votre_clé_azure_openai
AZURE_OPENAI_ENDPOINT=votre_endpoint_azure_openai
AZURE_OPENAI_VERSION=votre_version_azure_openai
# Optionnel :Ajouter un hôte distant Ollama si il n'est pas local. défaut : http://localhost:11434
OLLAMA_HOST=votre_hote_ollama
```

#### Étape 2 : Démarrer le Backend

```bash
# Installer dépendances Python
python -m pip install poetry==2.0.1 && poetry install -C api

# Démarrer le serveur API
python -m api.main
```

#### Étape 3 : Démarrer le Frontend

```bash
# Installer les dépendances JavaScript
npm install
# ou
yarn install

# Démarrer le serveur web
npm run dev
# ou
yarn dev
```

#### Étape 4 : Utiliser DeepWiki!

1. Ouvrir [http://localhost:3000](http://localhost:3000) dans votre navigateur
2. Entrer l'adresse d'un dépôt GitHub, GitLab ou Bitbucket (comme `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, or `https://bitbucket.org/redradish/atlassian_app_versions`)
3. Pour les dépôts privés, cliquez sur "+ Ajouter un jeton d'accès" et entrez votre jeton d’accès personnel GitHub ou GitLab.
4. Cliquez sur "Générer le Wiki" et regardez la magie opérer !

## 🔍 Comment ça marche

DeepWiki utilise l'IA pour :

1. Cloner et analyser le dépôt GitHub, GitLab ou Bitbucket (y compris les dépôts privés avec authentification par jeton d'accès)
2. Créer des embeddings du code pour une récupération intelligente
3. Générer de la documentation avec une IA sensible au contexte (en utilisant les modèles Google Gemini, OpenAI, OpenRouter, Azure OpenAI ou Ollama local)
4. Créer des diagrammes visuels pour expliquer les relations du code
5. Organiser le tout dans un wiki structuré
6. Permettre des questions-réponses intelligentes avec le dépôt grâce à la fonctionnalité Ask
7. Fournir des capacités de recherche approfondie avec DeepResearch

```mermaid
graph TD
    A[Utilisateur entre un dépôt GitHub/GitLab/Bitbucket] --> AA{Dépôt privé?}
    AA -->|Oui| AB[Ajouter un jeton d'accès]
    AA -->|Non| B[Cloner le dépôt]
    AB --> B
    B --> C[Analyser la structure du code]
    C --> D[Créer des Embeddings]

    D --> M{Sélectionner le modèle}
    M -->|Google Gemini| E1[Générer avec Gemini]
    M -->|OpenAI| E2[Générer avec OpenAI]
    M -->|OpenRouter| E3[Générer avec OpenRouter]
    M -->|Local Ollama| E4[Générer avec Ollama]
    M -->|Azure| E5[Générer avec Azure]

    E1 --> E[Générer la documentation]
    E2 --> E
    E3 --> E
    E4 --> E
    E5 --> E

    D --> F[Créer des diagrammes]
    E --> G[Organiser en Wiki]
    F --> G
    G --> H[DeepWiki interactif]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4,E5 process;
    class H result;
```

## 🛠️ Structure du Projet

```
deepwiki/
├── api/                  # Serveur API Backend
│   ├── main.py           # Point d'entrée de l'API
│   ├── api.py            # Implémentation FastAPI
│   ├── rag.py            # Génération Augmentée par Récupération (RAG)
│   ├── data_pipeline.py  # Utilitaires de traitement des données
│   └── requirements.txt  # Dépendances Python
│
├── src/                  # Application Frontend Next.js
│   ├── app/              # Répertoire de l'application Next.js
│   │   └── page.tsx      # Page principale de l'application
│   └── components/       # Composants React
│       └── Mermaid.tsx   # Rendu des diagrammes Mermaid
│
├── public/               # Ressources statiques
├── package.json          # Dépendances JavaScript
└── .env                  # Variables d'environnement (à créer)
```

## 🤖 Système de sélection de modèles

DeepWiki implémente désormais un système de sélection de modèles flexible, qui prend en charge plusieurs fournisseurs de LLM :

### Fournisseurs et modèles pris en charge

- **Google** : Par défaut `gemini-2.5-flash`, prend également en charge `gemini-2.5-flash-lite`, `gemini-2.5-pro`, etc.
- **OpenAI** : Par défaut `gpt-5-nano`, prend également en charge `gpt-5`, `4o`, etc.
- **OpenRouter** : Accès à plusieurs modèles via une API unifiée, notamment Claude, Llama, Mistral, etc.
- **Azure OpenAI** : Par défaut `gpt-4o`, prend également en charge `o4-mini`, etc.
- **Ollama** : Prise en charge des modèles open source exécutés localement, tels que `llama3`.

### Variables d'environnement

Chaque fournisseur requiert les variables d'environnement de clé API correspondantes :

```
# API Keys
GOOGLE_API_KEY=votre_clé_google        # Requis pour les modèles Google Gemini
OPENAI_API_KEY=votre_clé_openai        # Requis pour les modèles OpenAI
OPENROUTER_API_KEY=votre_clé_openrouter # Requis pour les modèles OpenRouter
AZURE_OPENAI_API_KEY=votre_clé_azure_openai  #Requis pour les modèles Azure OpenAI
AZURE_OPENAI_ENDPOINT=votre_endpoint_azure_openai  #Requis pour les modèles Azure OpenAI
AZURE_OPENAI_VERSION=votre_version_azure_openai  #Requis pour les modèles Azure OpenAI

# Configuration d'un endpoint OpenAI API personnalisé
OPENAI_BASE_URL=https://custom-api-endpoint.com/v1  # Optionnel, pour les endpoints API OpenAI personnalisés

# Hôte Ollama personnalisé
OLLAMA_HOST=votre_hôte_ollama # Optionnel, si Ollama n'est pas local. défaut: http://localhost:11434

# Répertoire de configuration
DEEPWIKI_CONFIG_DIR=/chemin/vers/dossier/de/configuration  # Optionnel, pour personaliser le répertoire de stockage de la configuration
```

### Fichiers de Configuration

DeepWiki utilise des fichiers de configuration JSON pour gérer différents aspects du système :

1. **`generator.json`** : Configuration des modèles de génération de texte
   - Définit les fournisseurs de modèles disponibles (Google, OpenAI, OpenRouter, Azure, Ollama)
   - Spécifie les modèles par défaut et disponibles pour chaque fournisseur
   - Contient des paramètres spécifiques aux modèles tels que la température et top_p

2. **`embedder.json`** : Configuration des modèles d'embedding et du traitement de texte
   - Définit les modèles d'embedding pour le stockage vectoriel
   - Contient la configuration du retriever pour RAG
   - Spécifie les paramètres du séparateur de texte pour le chunking de documents

3. **`repo.json`** : Configuration de la gestion des dépôts
   - Contient des filtres de fichiers pour exclure certains fichiers et répertoires
   - Définit les limites de taille des dépôts et les règles de traitement

Par défaut, ces fichiers sont situés dans le répertoire `api/config/`. Vous pouvez personnaliser leur emplacement à l'aide de la variable d'environnement `DEEPWIKI_CONFIG_DIR`.

### Sélection de Modèles Personnalisés pour les Fournisseurs de Services

La fonctionnalité de sélection de modèles personnalisés est spécialement conçue pour les fournisseurs de services qui ont besoin de :

- Offrir plusieurs choix de modèles d'IA aux utilisateurs au sein de leur organisation
- S'adapter rapidement à l'évolution rapide du paysage des LLM sans modifications de code
- Prendre en charge des modèles spécialisés ou affinés qui ne figurent pas dans la liste prédéfinie

Les fournisseurs de services peuvent implémenter leurs offres de modèles en sélectionnant parmi les options prédéfinies ou en entrant des identifiants de modèles personnalisés dans l'interface utilisateur.

### Configuration de l'URL de base pour les canaux privés d'entreprise

La configuration `base_url` du client OpenAI est principalement conçue pour les utilisateurs d'entreprise disposant de canaux API privés. Cette fonctionnalité :

- Permet la connexion à des points de terminaison API privés ou spécifiques à l'entreprise.
- Permet aux organisations d'utiliser leurs propres services LLM auto-hébergés ou déployés sur mesure.
- Prend en charge l'intégration avec des services tiers compatibles avec l'API OpenAI.

**Bientôt disponible** : Dans les prochaines mises à jour, DeepWiki prendra en charge un mode où les utilisateurs devront fournir leurs propres clés API dans les requêtes. Cela permettra aux entreprises clientes disposant de canaux privés d'utiliser leurs accords API existants sans partager leurs informations d'identification avec le déploiement DeepWiki.

## 🧩 Utilisation de modèles d'embedding compatibles avec OpenAI (par exemple, Alibaba Qwen)

Si vous souhaitez utiliser des modèles d'embedding compatibles avec l'API OpenAI (comme Alibaba Qwen), suivez ces étapes :

1. Remplacez le contenu de `api/config/embedder.json` par celui de `api/config/embedder_openai_compatible.json`.
2. Dans votre fichier `.env` à la racine du projet, définissez les variables d'environnement appropriées, par exemple :
   ```
   OPENAI_API_KEY=votre_clé_api
   OPENAI_BASE_URL=votre_endpoint_compatible_openai
   ```
3. Le programme substituera automatiquement les espaces réservés dans `embedder.json` avec les valeurs de vos variables d'environnement.

Cela vous permet de passer facilement à n'importe quel service d'embedding compatible avec OpenAI sans modifications de code.

### Journalisation (Logging)

DeepWiki utilise le module `logging` intégré de Python pour la sortie de diagnostics. Vous pouvez configurer la verbosité et la destination du fichier journal via des variables d'environnement :

| Variable        | Description                                                               | Valeur par défaut             |
|-----------------|---------------------------------------------------------------------------|------------------------------|
| `LOG_LEVEL`     | Niveau de journalisation (DEBUG, INFO, WARNING, ERROR, CRITICAL).         | INFO                         |
| `LOG_FILE_PATH` | Chemin vers le fichier journal. Si défini, les journaux y seront écrits.  | `api/logs/application.log`   |

Pour activer la journalisation de débogage et diriger les journaux vers un fichier personnalisé :
```bash
export LOG_LEVEL=DEBUG
export LOG_FILE_PATH=./debug.log
python -m api.main
```
Ou avec Docker Compose:
```bash
LOG_LEVEL=DEBUG LOG_FILE_PATH=./debug.log docker-compose up
```

Lors de l'exécution avec Docker Compose, le répertoire `api/logs` du conteneur est lié à `./api/logs` sur votre hôte (voir la section `volumes` dans `docker-compose.yml`), ce qui garantit que les fichiers journaux persistent lors des redémarrages.

Vous pouvez également stocker ces paramètres dans votre fichier `.env` :

```bash
LOG_LEVEL=DEBUG
LOG_FILE_PATH=./debug.log
```
Puis exécutez simplement :

```bash
docker-compose up
```

**Considérations de sécurité concernant le chemin des journaux :** Dans les environnements de production, assurez-vous que le répertoire `api/logs` et tout chemin de fichier journal personnalisé sont sécurisés avec des permissions de système de fichiers et des contrôles d'accès appropriés. L'application s'assure que `LOG_FILE_PATH` se trouve dans le répertoire `api/logs` du projet afin d'empêcher le parcours de chemin ou les écritures non autorisées.

## 🛠️ Configuration Avancée

### Variables d'environnement

| Variable                | Description                                                     | Requis     | Note                                                                                                     |
|-------------------------|-----------------------------------------------------------------|------------|----------------------------------------------------------------------------------------------------------|
| `GOOGLE_API_KEY`        | Clé API Google Gemini pour la génération                        | Non        | Requis uniquement si vous souhaitez utiliser les modèles Google Gemini                                   |
| `OPENAI_API_KEY`        | Clé API OpenAI pour les embeddings et la génération              | Oui        | Remarque : Ceci est requis même si vous n'utilisez pas les modèles OpenAI, car elle est utilisée pour les embeddings. |
| `OPENROUTER_API_KEY`    | Clé API OpenRouter pour les modèles alternatifs                 | Non        | Requis uniquement si vous souhaitez utiliser les modèles OpenRouter                                      |
| `AZURE_OPENAI_API_KEY`  | Clé API Azure OpenAI                                            | Non        | Requis uniquement si vous souhaitez utiliser les modèles Azure OpenAI                                    |
| `AZURE_OPENAI_ENDPOINT` | Point de terminaison Azure OpenAI                               | Non        | Requis uniquement si vous souhaitez utiliser les modèles Azure OpenAI                                    |
| `AZURE_OPENAI_VERSION`  | Version Azure OpenAI                                            | Non        | Requis uniquement si vous souhaitez utiliser les modèles Azure OpenAI                                    |
| `OLLAMA_HOST`           | Hôte Ollama (par défaut : http://localhost:11434)               | Non        | Requis uniquement si vous souhaitez utiliser un serveur Ollama externe                                   |
| `PORT`                  | Port du serveur API (par défaut : 8001)                         | Non        | Si vous hébergez l'API et le frontend sur la même machine, assurez-vous de modifier le port de `SERVER_BASE_URL` en conséquence |
| `SERVER_BASE_URL`       | URL de base du serveur API (par défaut : http://localhost:8001) | Non        |                                                                                                           |
| `DEEPWIKI_AUTH_MODE`    | Définir sur `true` ou `1` pour activer le mode verrouillé        | Non        | La valeur par défaut est `false`. Si activé, `DEEPWIKI_AUTH_CODE` est requis.                             |
| `DEEPWIKI_AUTH_CODE`    | Le code requis pour la génération de wiki lorsque `DEEPWIKI_AUTH_MODE` est activé. | Non        | Utilisé uniquement si `DEEPWIKI_AUTH_MODE` est `true` ou `1`.                          |

Si vous n'utilisez pas le mode Ollama, vous devez configurer une clé API OpenAI pour les embeddings. Les autres clés API ne sont requises que si vous configurez et utilisez des modèles des fournisseurs correspondants.

## Mode vérouillé

DeepWiki peut être configuré pour fonctionner en mode vérouillé, où la génération de wiki nécessite un code d'autorisation valide. Ceci est utile si vous souhaitez contrôler qui peut utiliser la fonctionnalité de génération.
Restreint l'initialisation du frontend et protège la suppression du cache, mais n'empêche pas complètement la génération backend si les points de terminaison de l'API sont atteints directement.

Pour activer le mode vérouillé, définissez les variables d'environnement suivantes :

- `DEEPWIKI_AUTH_MODE` : définissez cette variable sur `true` ou `1`. Une fois activée, l'interface affichera un champ de saisie pour le code d'autorisation.
- `DEEPWIKI_AUTH_CODE` : définissez cette variable sur le code secret souhaité. Restreint l'initialisation du frontend et protège la suppression du cache, mais n'empêche pas complètement la génération backend si les points de terminaison de l'API sont atteints directement.

Si `DEEPWIKI_AUTH_MODE` n'est pas défini ou est défini sur `false` (ou toute autre valeur que `true`/`1`), la fonctionnalité d'autorisation sera désactivée et aucun code ne sera requis.

### Configuration Docker

Vous pouvez utiliser Docker pour exécuter DeepWiki :

#### Exécution du conteneur

```bash
# Récupérer l'image depuis GitHub Container Registry
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# Exécuter le conteneur avec les variables d'environnement
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=votre_clé_google \
  -e OPENAI_API_KEY=votre_clé_openai \
  -e OPENROUTER_API_KEY=votre_clé_openrouter \
  -e OLLAMA_HOST=votre_hôte_ollama \
  -e AZURE_OPENAI_API_KEY=votre_clé_azure_openai \
  -e AZURE_OPENAI_ENDPOINT=votre_endpoint_azure_openai \
  -e AZURE_OPENAI_VERSION=votre_version_azure_openai \

  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Cette commande monte également `~/.adalflow` de votre hôte vers `/root/.adalflow` dans le conteneur. Ce chemin est utilisé pour stocker :
- Les dépôts clonés (`~/.adalflow/repos/`)
- Leurs embeddings et index (`~/.adalflow/databases/`)
- Le contenu wiki généré mis en cache (`~/.adalflow/wikicache/`)

Cela garantit que vos données persistent même si le conteneur est arrêté ou supprimé.

Vous pouvez également utiliser le fichier `docker-compose.yml` fourni :

```bash
# Modifiez d'abord le fichier .env avec vos clés API
docker-compose up
```

(Le fichier `docker-compose.yml` est préconfiguré pour monter `~/.adalflow` pour la persistance des données, de manière similaire à la commande `docker run` ci-dessus.)

#### Utilisation d'un fichier .env avec Docker

Vous pouvez également monter un fichier `.env` dans le conteneur :

```bash
# Créer un fichier .env avec vos clés API
echo "GOOGLE_API_KEY=votre_clé_google" > .env
echo "OPENAI_API_KEY=votre_clé_openai" >> .env
echo "OPENROUTER_API_KEY=votre_clé_openrouter" >> .env
echo "AZURE_OPENAI_API_KEY=votre_clé_azure_openai" >> .env
echo "AZURE_OPENAI_ENDPOINT=votre_endpoint_azure_openai" >> .env
echo "AZURE_OPENAI_VERSION=votre_version_azure_openai"  >> .env
echo "OLLAMA_HOST=votre_hôte_ollama" >> .env

# Run the container with the .env file mounted
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Cette commande monte également `~/.adalflow` de votre hôte vers `/root/.adalflow` dans le conteneur. Ce chemin est utilisé pour stocker :
- Les dépôts clonés (`~/.adalflow/repos/`)
- Leurs embeddings et index (`~/.adalflow/databases/`)
- Le contenu wiki généré mis en cache (`~/.adalflow/wikicache/`)

Cela garantit que vos données persistent même si le conteneur est arrêté ou supprimé.

#### Construction de l'image Docker localement

If you want to build the Docker image locally:

```bash
# Clone the repository
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Build the Docker image
docker build -t deepwiki-open .

# Run the container
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=votre_clé_google \
  -e OPENAI_API_KEY=votre_clé_openai \
  -e OPENROUTER_API_KEY=votre_clé_openrouter \
  -e AZURE_OPENAI_API_KEY=votre_clé_azure_openai \
  -e AZURE_OPENAI_ENDPOINT=votre_endpoint_azure_openai \
  -e AZURE_OPENAI_VERSION=votre_version_azure_openai \
  -e OLLAMA_HOST=votre_hôte_ollama \
  deepwiki-open
```

#### Utilisation de certificats auto-signés dans Docker

Si vous êtes dans un environnement qui utilise des certificats auto-signés, vous pouvez les inclure dans la construction de l'image Docker :

1. Créez un répertoire pour vos certificats (le répertoire par défaut est `certs` à la racine de votre projet)
2. Copiez vos fichiers de certificats `.crt` ou `.pem` dans ce répertoire
3. Construisez l'image Docker :

```bash
# Construire avec le répertoire de certificats par défaut (certs)
docker build .

# Ou construire avec un répertoire de certificats personnalisé
docker build --build-arg CUSTOM_CERT_DIR=my-custom-certs .
```

### Détails du serveur API

Le serveur API fournit :
- Clonage et indexation des dépôts
- RAG (Retrieval Augmented Generation - Génération augmentée par récupération)
- Complétion de chat en streaming

Pour plus de détails, consultez le [README de l’API](./api/README.md).

## 🔌 Intégration OpenRouter

DeepWiki prend désormais en charge [OpenRouter](https://openrouter.ai/) en tant que fournisseur de modèles, vous donnant accès à des centaines de modèles d'IA via une seule API :

- **Options de modèles multiples** : accédez aux modèles d'OpenAI, Anthropic, Google, Meta, Mistral, et plus encore
- **Configuration simple** : ajoutez simplement votre clé API OpenRouter et sélectionnez le modèle que vous souhaitez utiliser
- **Rentabilité** : choisissez des modèles qui correspondent à votre budget et à vos besoins en termes de performances
- **Commutation facile** : basculez entre différents modèles sans modifier votre code

### Comment utiliser OpenRouter avec DeepWiki

1. **Obtenez une clé API** : inscrivez-vous sur [OpenRouter](https://openrouter.ai/) et obtenez votre clé API
2. **Ajouter à l'environnement** : ajoutez `OPENROUTER_API_KEY=votre_clé` à votre fichier `.env`
3. **Activer dans l'interface utilisateur** : cochez l'option "Utiliser l'API OpenRouter" sur la page d'accueil
4. **Sélectionnez le modèle** : choisissez parmi les modèles populaires tels que GPT-4o, Claude 3.5 Sonnet, Gemini 2.0, et plus encore

OpenRouter est particulièrement utile si vous souhaitez :

- Essayer différents modèles sans vous inscrire à plusieurs services
- Accéder à des modèles qui pourraient être restreints dans votre région
- Comparer les performances entre différents fournisseurs de modèles
- Optimiser le rapport coût/performance en fonction de vos besoins

## 🤖 Fonctionnalités Ask & DeepResearch

### Fonctionnalité Ask

La fonctionnalité Ask vous permet de discuter avec votre dépôt en utilisant la génération augmentée par récupération (RAG) :

- **Réponses sensibles au contexte** : obtenez des réponses précises basées sur le code réel de votre dépôt
- **Alimenté par RAG** : le système récupère des extraits de code pertinents pour fournir des réponses fondées
- **Streaming en temps réel** : visualisez les réponses au fur et à mesure de leur génération pour une expérience plus interactive
- **Historique des conversations** : le système conserve le contexte entre les questions pour des interactions plus cohérentes

### Fonctionnalité DeepResearch

DeepResearch fait passer l'analyse de référentiel au niveau supérieur avec un processus de recherche en plusieurs étapes :

- **Enquête approfondie** : explore en profondeur des sujets complexes grâce à de multiples itérations de recherche
- **Processus structuré** : suit un plan de recherche clair avec des mises à jour et une conclusion complète
- **Continuation automatique** : l'IA poursuit automatiquement la recherche jusqu'à ce qu'elle atteigne une conclusion (jusqu'à 5 itérations)
- **Étapes de la recherche** :
  1. **Plan de recherche** : décrit l'approche et les premières conclusions
  2. **Mises à jour de la recherche** : s'appuie sur les itérations précédentes avec de nouvelles informations
  3. **Conclusion finale** : fournit une réponse complète basée sur toutes les itérations

Pour utiliser DeepResearch, activez simplement le commutateur "Deep Research" dans l'interface Ask avant de soumettre votre question.

## 📱 Captures d'écran

![Interface principale de DeepWiki](screenshots/Interface.png)
*L'interface principale de DeepWiki*

![Prise en charge des dépôts privés](screenshots/privaterepo.png)
*Accédez aux dépôts privés avec des jetons d'accès personnels*

![Fonctionnalité DeepResearch](screenshots/DeepResearch.png)
*DeepResearch effectue des recherches en plusieurs étapes pour des sujets complexes*

### Vidéo de démonstration

[![Vidéo de démo DeepWiki](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*Regardez DeepWiki en action !*
## ❓ Dépannage

### Problèmes de clé API

- **"Variables d'environnement manquantes"** : assurez-vous que votre fichier `.env` se trouve à la racine du projet et qu'il contient les clés API requises.
- **"Clé API non valide"** : vérifiez que vous avez correctement copié la clé complète, sans espaces supplémentaires.
- **"Erreur d'API OpenRouter"** : vérifiez que votre clé API OpenRouter est valide et qu'elle dispose de crédits suffisants.
- **"Erreur d'API Azure OpenAI"** : vérifiez que vos informations d'identification Azure OpenAI (clé API, point de terminaison et version) sont correctes et que le service est correctement déployé.

### Problèmes de connexion

- **"Impossible de se connecter au serveur API"** : assurez-vous que le serveur API est en cours d'exécution sur le port 8001.
- **"Erreur CORS"** : l'API est configurée pour autoriser toutes les origines, mais si vous rencontrez des problèmes, essayez d'exécuter le frontend et le backend sur la même machine.

### Problèmes de génération

- **"Erreur lors de la génération du wiki"** : pour les très grands référentiels, essayez d'abord un référentiel plus petit.
- **"Format de référentiel non valide"** : assurez-vous que vous utilisez un format d'URL GitHub, GitLab ou Bitbucket valide.
- **"Impossible de récupérer la structure du référentiel"** : pour les référentiels privés, assurez-vous d'avoir saisi un jeton d'accès personnel valide avec les autorisations appropriées.
- **"Erreur de rendu du diagramme"** : l'application essaiera automatiquement de corriger les diagrammes cassés.

### Solutions courantes

1. **Redémarrez les deux serveurs** : parfois, un simple redémarrage résout la plupart des problèmes.
2. **Vérifiez les journaux de la console** : ouvrez les outils de développement du navigateur pour voir les erreurs JavaScript.
3. **Vérifiez les journaux de l'API** : consultez le terminal où l'API est en cours d'exécution pour les erreurs Python.

## 🤝 Contribution

Les contributions sont les bienvenues ! N'hésitez pas à :
- Ouvrir des issues pour les bugs ou les demandes de fonctionnalités
- Soumettre des pull requests pour améliorer le code
- Partager vos commentaires et vos idées

## 📄 Licence

Projet sous licence MIT – Voir le fichier [LICENSE](LICENSE).

## ⭐ Historique des stars

[![Historique des stars](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.ja.md
================================================
# DeepWiki-Open

![DeepWiki バナー](screenshots/Deepwiki.png)

**DeepWiki**は、GitHub、GitLab、または Bitbucket リポジトリのための美しくインタラクティブな Wiki を自動的に作成します！リポジトリ名を入力するだけで、DeepWiki は以下を行います：

1. コード構造を分析
2. 包括的なドキュメントを生成
3. すべての仕組みを説明する視覚的な図を作成
4. すべてを簡単に閲覧できる Wiki に整理

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)

[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ 特徴

- **即時ドキュメント生成**: あらゆる GitHub、GitLab、または Bitbucket リポジトリを数秒で Wiki に変換
- **プライベートリポジトリ対応**: 個人アクセストークンを使用してプライベートリポジトリに安全にアクセス
- **スマート分析**: AI を活用したコード構造と関係の理解
- **美しい図表**: アーキテクチャとデータフローを視覚化する自動 Mermaid 図
- **簡単なナビゲーション**: Wiki を探索するためのシンプルで直感的なインターフェース
- **質問機能**: RAG 搭載 AI を使用してリポジトリとチャットし、正確な回答を得る
- **詳細調査**: 複雑なトピックを徹底的に調査する多段階研究プロセス
- **複数のモデルプロバイダー**: Google Gemini、OpenAI、OpenRouter、およびローカル Ollama モデルのサポート

## 🚀 クイックスタート（超簡単！）

### オプション 1: Docker を使用

```bash
# リポジトリをクローン
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# APIキーを含む.envファイルを作成
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# オプション: OpenRouterモデルを使用する場合はOpenRouter APIキーを追加
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# Docker Composeで実行
docker-compose up
```

(上記の Docker コマンドおよび`docker-compose.yml`の設定では、ホスト上の`~/.adalflow`ディレクトリをコンテナ内の`/root/.adalflow`にマウントします。このパスは以下のものを保存するために使用されます：

- クローンされたリポジトリ (`~/.adalflow/repos/`)
- それらのエンベディングとインデックス (`~/.adalflow/databases/`)
- 生成された Wiki のキャッシュ (`~/.adalflow/wikicache/`)

これにより、コンテナが停止または削除されてもデータが永続化されます。)

> 💡 **これらのキーの入手先:**
>
> - Google API キーは[Google AI Studio](https://makersuite.google.com/app/apikey)から取得
> - OpenAI API キーは[OpenAI Platform](https://platform.openai.com/api-keys)から取得

### オプション 2: 手動セットアップ（推奨）

#### ステップ 1: API キーの設定

プロジェクトのルートに`.env`ファイルを作成し、以下のキーを追加します：

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# オプション: OpenRouterモデルを使用する場合は追加
OPENROUTER_API_KEY=your_openrouter_api_key
```

#### ステップ 2: バックエンドの起動

```bash
# Pythonの依存関係をインストール
python -m pip install poetry==2.0.1 && poetry install -C api

# APIサーバーを起動
python -m api.main
```

#### ステップ 3: フロントエンドの起動

```bash
# JavaScript依存関係をインストール
npm install
# または
yarn install

# Webアプリを起動
npm run dev
# または
yarn dev
```

#### ステップ 4: DeepWiki を使用！

1. ブラウザで[http://localhost:3000](http://localhost:3000)を開く
2. GitHub、GitLab、または Bitbucket リポジトリを入力（例：`https://github.com/openai/codex`、`https://github.com/microsoft/autogen`、`https://gitlab.com/gitlab-org/gitlab`、または`https://bitbucket.org/redradish/atlassian_app_versions`）
3. プライベートリポジトリの場合は、「+ アクセストークンを追加」をクリックして GitHub または GitLab の個人アクセストークンを入力
4. 「Wiki を生成」をクリックして、魔法が起こるのを見守りましょう！

## 🔍 仕組み

DeepWiki は AI を使用して：

1. GitHub、GitLab、または Bitbucket リポジトリをクローンして分析（トークン認証によるプライベートリポジトリを含む）
2. スマート検索のためのコードの埋め込みを作成
3. コンテキスト対応 AI でドキュメントを生成（Google Gemini、OpenAI、OpenRouter、またはローカル Ollama モデルを使用）
4. コードの関係を説明する視覚的な図を作成
5. すべてを構造化された Wiki に整理
6. 質問機能を通じてリポジトリとのインテリジェントな Q&A を可能に
7. 詳細調査機能で深い研究能力を提供

```mermaid
graph TD
    A[ユーザーがGitHub/GitLab/Bitbucketリポジトリを入力] --> AA{プライベートリポジトリ?}
    AA -->|はい| AB[アクセストークンを追加]
    AA -->|いいえ| B[リポジトリをクローン]
    AB --> B
    B --> C[コード構造を分析]
    C --> D[コード埋め込みを作成]

    D --> M{モデルプロバイダーを選択}
    M -->|Google Gemini| E1[Geminiで生成]
    M -->|OpenAI| E2[OpenAIで生成]
    M -->|OpenRouter| E3[OpenRouterで生成]
    M -->|ローカルOllama| E4[Ollamaで生成]

    E1 --> E[ドキュメントを生成]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[視覚的な図を作成]
    E --> G[Wikiとして整理]
    F --> G
    G --> H[インタラクティブなDeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ プロジェクト構造

```
deepwiki/
├── api/                  # バックエンドAPIサーバー
│   ├── main.py           # APIエントリーポイント
│   ├── api.py            # FastAPI実装
│   ├── rag.py            # 検索拡張生成
│   ├── data_pipeline.py  # データ処理ユーティリティ
│   └── requirements.txt  # Python依存関係
│
├── src/                  # フロントエンドNext.jsアプリ
│   ├── app/              # Next.jsアプリディレクトリ
│   │   └── page.tsx      # メインアプリケーションページ
│   └── components/       # Reactコンポーネント
│       └── Mermaid.tsx   # Mermaid図レンダラー
│
├── public/               # 静的アセット
├── package.json          # JavaScript依存関係
└── .env                  # 環境変数（作成する必要あり）
```

## 🛠️ 高度な設定

### 環境変数

| 変数                          | 説明                                                            | 必須 | 注意                                                                                                          |
| ----------------------------- | --------------------------------------------------------------- | ---- | ------------------------------------------------------------------------------------------------------------- |
| `GOOGLE_API_KEY`              | AI 生成のための Google Gemini API キー                          | ◯    |                                                                                                               |
| `OPENAI_API_KEY`              | 埋め込みのための OpenAI API キー                                | ◯    |                                                                                                               |
| `OPENROUTER_API_KEY`          | 代替モデルのための OpenRouter API キー                          | ✗    | OpenRouter モデルを使用する場合にのみ必須です                                                                 |
| `PORT`                        | API サーバーのポート（デフォルト：8001）                        | ✗    | API とフロントエンドを同じマシンでホストする場合、`NEXT_PUBLIC_SERVER_BASE_URL`のポートを適宜変更してください |
| `SERVER_BASE_URL`             | API サーバーのベース URL（デフォルト：`http://localhost:8001`） | ✗    |                                                                                                               |

### 設定ファイル

DeepWikiはシステムの様々な側面を管理するためにJSON設定ファイルを使用しています：

1. **`generator.json`**: テキスト生成モデルの設定
   - 利用可能なモデルプロバイダー（Google、OpenAI、OpenRouter、Ollama）を定義
   - 各プロバイダーのデフォルトおよび利用可能なモデルを指定
   - temperatureやtop_pなどのモデル固有のパラメータを含む

2. **`embedder.json`**: 埋め込みモデルとテキスト処理の設定
   - ベクトルストレージ用の埋め込みモデルを定義
   - RAG用の検索設定を含む
   - ドキュメントチャンク分割のためのテキスト分割設定を指定

3. **`repo.json`**: リポジトリ処理の設定
   - 特定のファイルやディレクトリを除外するファイルフィルターを含む
   - リポジトリサイズ制限と処理ルールを定義

デフォルトでは、これらのファイルは`api/config/`ディレクトリにあります。`DEEPWIKI_CONFIG_DIR`環境変数を使用して、その場所をカスタマイズできます。

### Docker セットアップ

Docker を使用して DeepWiki を実行できます：

```bash
# GitHub Container Registryからイメージをプル
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# 環境変数を設定してコンテナを実行
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

このコマンドは、ホスト上の ⁠~/.adalflow をコンテナ内の ⁠/root/.adalflow にマウントします。このパスは以下のものを保存するために使用されます：

- クローンされたリポジトリ (⁠~/.adalflow/repos/)
- それらのエンベディングとインデックス (⁠~/.adalflow/databases/)
- 生成された Wiki のキャッシュ (⁠~/.adalflow/wikicache/)

これにより、コンテナが停止または削除されてもデータが永続化されます。
または、提供されている ⁠docker-compose.yml ファイルを使用します。

```bash
# まず.envファイルをAPIキーで編集
docker-compose up
```

（⁠docker-compose.yml ファイルは、上記の ⁠docker run コマンドと同様に、データ永続化のために ⁠~/.adalflow をマウントするように事前設定されています。）

#### Docker で.env ファイルを使用する

.env ファイルをコンテナにマウントすることもできます：

```bash
# APIキーを含む.envファイルを作成
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# .envファイルをマウントしてコンテナを実行
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

このコマンドは、ホスト上の ⁠~/.adalflow をコンテナ内の ⁠/root/.adalflow にマウントします。このパスは以下のものを保存するために使用されます：

- クローンされたリポジトリ (⁠~/.adalflow/repos/)
- それらのエンベディングとインデックス (⁠~/.adalflow/databases/)
- 生成された Wiki のキャッシュ (⁠~/.adalflow/wikicache/)

これにより、コンテナが停止または削除されてもデータが永続化されます。

#### Docker イメージをローカルでビルドする

Docker イメージをローカルでビルドしたい場合：

```bash
# リポジトリをクローン
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Dockerイメージをビルド
docker build -t deepwiki-open .

# コンテナを実行
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  deepwiki-open
```

# API サーバー詳細

API サーバーは以下を提供します：

- リポジトリのクローンとインデックス作成
- RAG（Retrieval Augmented Generation：検索拡張生成）
- ストリーミングチャット補完

詳細については、API README を参照してください。

## 🤖 プロバイダーベースのモデル選択システム

DeepWikiでは、複数のLLMプロバイダーをサポートする柔軟なプロバイダーベースのモデル選択システムを実装しています：

### サポートされているプロバイダーとモデル

- **Google**: デフォルトは `gemini-2.5-flash`、また `gemini-2.5-flash-lite`、`gemini-2.5-pro` などもサポート
- **OpenAI**: デフォルトは `gpt-5-nano`、また `gpt-5`、 `4o` などもサポート
- **OpenRouter**: Claude、Llama、Mistralなど、統一APIを通じて複数のモデルにアクセス
- **Ollama**: `llama3` などのローカルで実行するオープンソースモデルをサポート

### 環境変数

各プロバイダーには、対応するAPI鍵の環境変数が必要です：

```
# API鍵
GOOGLE_API_KEY=あなたのGoogle API鍵        # Google Geminiモデルに必要
OPENAI_API_KEY=あなたのOpenAI鍵            # OpenAIモデルに必要
OPENROUTER_API_KEY=あなたのOpenRouter鍵    # OpenRouterモデルに必要

# OpenAI APIベースURL設定
OPENAI_BASE_URL=https://カスタムAPIエンドポイント.com/v1  # オプション、カスタムOpenAI APIエンドポイント用
```

### サービスプロバイダー向けのカスタムモデル選択

カスタムモデル選択機能は、あなたの組織のユーザーに様々なAIモデルの選択肢を提供するために特別に設計されています：

- あなたは組織内のユーザーに様々なAIモデルの選択肢を提供できます
- あなたはコード変更なしで急速に進化するLLM環境に迅速に適応できます
- あなたは事前定義リストにない専門的またはファインチューニングされたモデルをサポートできます

サービスプロバイダーは、事前定義されたオプションから選択するか、フロントエンドインターフェースでカスタムモデル識別子を入力することで、モデル提供を実装できます。

### エンタープライズプライベートチャネル向けのベースURL設定

OpenAIクライアントのbase_url設定は、主にプライベートAPIチャネルを持つエンタープライズユーザー向けに設計されています。この機能は：

- プライベートまたは企業固有のAPIエンドポイントへの接続を可能に
- 組織が自己ホスト型または独自にデプロイされたLLMサービスを使用可能に
- サードパーティのOpenAI API互換サービスとの統合をサポート

**近日公開**: 将来のアップデートでは、ユーザーがリクエストで自分のAPI鍵を提供する必要があるモードをDeepWikiがサポートする予定です。これにより、プライベートチャネルを持つエンタープライズ顧客は、DeepWikiデプロイメントと認証情報を共有することなく、既存のAPI設定を使用できるようになります。

## 🔌 OpenRouter 連携

DeepWiki は、モデルプロバイダーとして OpenRouter をサポートするようになり、単一の API を通じて数百の AI モデルにアクセスできるようになりました。

- 複数のモデルオプション: OpenAI、Anthropic、Google、Meta、Mistralなど、統一APIを通じて複数のモデルにアクセス
- 簡単な設定: OpenRouter API キーを追加し、使用したいモデルを選択するだけ
- コスト効率: 予算とパフォーマンスのニーズに合ったモデルを選択
- 簡単な切り替え: コードを変更することなく、異なるモデル間を切り替え可能

### DeepWiki で OpenRouter を使用する方法

1. API キーを取得: OpenRouter でサインアップし、API キーを取得します
2. 環境に追加: ⁠.env ファイルに ⁠OPENROUTER_API_KEY=your_key を追加します
3. UI で有効化: ホームページの「OpenRouter API を使用」オプションをチェックします
4. モデルを選択: GPT-4o、Claude 3.5 Sonnet、Gemini 2.0 などの人気モデルから選択します

OpenRouter は特に以下のような場合に便利です：

- 複数のサービスにサインアップせずに異なるモデルを試したい
- お住まいの地域で制限されている可能性のあるモデルにアクセスしたい
- 異なるモデルプロバイダー間でパフォーマンスを比較したい
- ニーズに基づいてコストとパフォーマンスを最適化したい

## 🤖 質問と詳細調査機能

### 質問機能

質問機能を使用すると、検索拡張生成（RAG）を使用してリポジトリとチャットできます：

- **コンテキスト対応の回答**: リポジトリの実際のコードに基づいた正確な回答を取得
- **RAG 搭載**: システムは関連するコードスニペットを取得して根拠のある回答を提供
- **リアルタイムストリーミング**: よりインタラクティブな体験のために、生成されるレスポンスをリアルタイムで確認
- **会話履歴**: システムは質問間のコンテキストを維持し、より一貫性のあるインタラクションを実現

### 詳細調査機能

詳細調査は、複数ターンの研究プロセスでリポジトリ分析を次のレベルに引き上げます：

- **詳細な調査**: 複数の研究反復を通じて複雑なトピックを徹底的に探索
- **構造化されたプロセス**: 明確な研究計画、更新、包括的な結論を含む
- **自動継続**: AI は結論に達するまで自動的に研究を継続（最大 5 回の反復）
- **研究段階**:
  1. **研究計画**: アプローチと初期調査結果の概要
  2. **研究更新**: 新しい洞察を加えて前の反復を発展
  3. **最終結論**: すべての反復に基づく包括的な回答を提供

詳細調査を使用するには、質問を送信する前に質問インターフェースの「詳細調査」スイッチをオンにするだけです。

## 📱 スクリーンショット

![DeepWikiメインインターフェース](screenshots/Interface.png)
_DeepWiki のメインインターフェース_

![プライベートリポジトリサポート](screenshots/privaterepo.png)
_個人アクセストークンを使用したプライベートリポジトリへのアクセス_

![詳細調査機能](screenshots/DeepResearch.png)
_詳細調査は複雑なトピックに対して多段階の調査を実施_

### デモビデオ

[![DeepWikiデモビデオ](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

_DeepWiki の動作を見る！_

## ❓ トラブルシューティング

### API キーの問題

- **「環境変数が見つかりません」**: `.env`ファイルがプロジェクトのルートにあり、必要な API キーが含まれていることを確認
- **「API キーが無効です」**: キー全体が余分なスペースなしで正しくコピーされていることを確認
- **「OpenRouter API エラー」**: OpenRouter API キーが有効で、十分なクレジットがあることを確認

### 接続の問題

- **「API サーバーに接続できません」**: API サーバーがポート 8001 で実行されていることを確認
- **「CORS エラー」**: API はすべてのオリジンを許可するように設定されていますが、問題がある場合は、フロントエンドとバックエンドを同じマシンで実行してみてください

### 生成の問題

- **「Wiki の生成中にエラーが発生しました」**: 非常に大きなリポジトリの場合は、まず小さいものから試してみてください
- **「無効なリポジトリ形式」**: 有効な GitHub、GitLab、または Bitbucket URL の形式を使用していることを確認
- **「リポジトリ構造を取得できませんでした」**: プライベートリポジトリの場合、適切な権限を持つ有効な個人アクセストークンを入力したことを確認
- **「図のレンダリングエラー」**: アプリは自動的に壊れた図を修正しようとします

### 一般的な解決策

1. **両方のサーバーを再起動**: 単純な再起動でほとんどの問題が解決することがあります
2. **コンソールログを確認**: ブラウザの開発者ツールを開いて JavaScript エラーを確認
3. **API ログを確認**: API が実行されているターミナルで Python エラーを確認

## 🤝 貢献

貢献は歓迎します！以下のことを自由に行ってください：

- バグや機能リクエストの問題を開く
- コードを改善するためのプルリクエストを提出
- フィードバックやアイデアを共有

## 📄 ライセンス

このプロジェクトは MIT ライセンスの下でライセンスされています - 詳細は[LICENSE](LICENSE)ファイルを参照してください。

## ⭐ スター履歴

[![スター履歴チャート](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.kr.md
================================================
# DeepWiki-Open

![DeepWiki Banner](screenshots/Deepwiki.png)

**DeepWiki**는 제가 직접 구현한 프로젝트로, GitHub, GitLab 또는 BitBucket 저장소에 대해 아름답고 대화형 위키를 자동 생성합니다! 저장소 이름만 입력하면 DeepWiki가 다음을 수행합니다:

1. 코드 구조 분석
2. 포괄적인 문서 생성
3. 모든 작동 방식을 설명하는 시각적 다이어그램 생성
4. 이를 쉽게 탐색할 수 있는 위키로 정리

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)

[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ 주요 기능

- **즉시 문서화**: 어떤 GitHub, GitLab 또는 BitBucket 저장소든 몇 초 만에 위키로 변환
- **비공개 저장소 지원**: 개인 액세스 토큰으로 비공개 저장소 안전하게 접근
- **스마트 분석**: AI 기반 코드 구조 및 관계 이해
- **아름다운 다이어그램**: 아키텍처와 데이터 흐름을 시각화하는 자동 Mermaid 다이어그램
- **쉬운 탐색**: 간단하고 직관적인 인터페이스로 위키 탐색 가능
- **Ask 기능**: RAG 기반 AI와 저장소에 대해 대화하며 정확한 답변 얻기
- **DeepResearch**: 복잡한 주제를 철저히 조사하는 다중 턴 연구 프로세스
- **다양한 모델 제공자 지원**: Google Gemini, OpenAI, OpenRouter, 로컬 Ollama 모델 지원

## 🚀 빠른 시작 (초간단!)

### 옵션 1: Docker 사용

```bash
# 저장소 클론
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# API 키를 포함한 .env 파일 생성
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# 선택 사항: OpenRouter 모델 사용 시 API 키 추가
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# Docker Compose로 실행
docker-compose up
```

> 💡 **API 키는 어디서 얻나요:**
> - [Google AI Studio](https://makersuite.google.com/app/apikey)에서 Google API 키 받기
> - [OpenAI 플랫폼](https://platform.openai.com/api-keys)에서 OpenAI API 키 받기

### 옵션 2: 수동 설정 (권장)

#### 1단계: API 키 설정

프로젝트 루트에 `.env` 파일을 만들고 다음 키들을 추가하세요:

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# 선택 사항: OpenRouter 모델 사용 시 추가
OPENROUTER_API_KEY=your_openrouter_api_key
```

#### 2단계: 백엔드 시작

```bash
# Python 의존성 설치
python -m pip install poetry==2.0.1 && poetry install -C api

# API 서버 실행
python -m api.main
```

#### 3단계: 프론트엔드 시작

```bash
# JavaScript 의존성 설치
npm install
# 또는
yarn install

# 웹 앱 실행
npm run dev
# 또는
yarn dev
```

#### 4단계: DeepWiki 사용하기!

1. 브라우저에서 [http://localhost:3000](http://localhost:3000) 열기
2. GitHub, GitLab 또는 Bitbucket 저장소 입력 (예: `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, `https://bitbucket.org/redradish/atlassian_app_versions`)
3. 비공개 저장소인 경우 "+ 액세스 토큰 추가" 클릭 후 GitHub 또는 GitLab 개인 액세스 토큰 입력
4. "Generate Wiki" 클릭 후 마법을 지켜보기!

## 🔍 작동 방식

DeepWiki는 AI를 사용하여 다음을 수행합니다:

1. GitHub, GitLab 또는 Bitbucket 저장소 복제 및 분석 (토큰 인증이 필요한 비공개 저장소 포함)
2. 스마트 검색을 위한 코드 임베딩 생성
3. 문맥 인지 AI로 문서 생성 (Google Gemini, OpenAI, OpenRouter 또는 로컬 Ollama 모델 사용)
4. 코드 관계를 설명하는 시각적 다이어그램 생성
5. 모든 것을 구조화된 위키로 정리
6. Ask 기능을 통한 저장소와의 지능형 Q&A 지원
7. DeepResearch로 심층 연구 기능 제공

```mermaid
graph TD
    A[사용자가 GitHub/GitLab/Bitbucket 저장소 입력] --> AA{비공개 저장소인가?}
    AA -->|예| AB[액세스 토큰 추가]
    AA -->|아니오| B[저장소 복제]
    AB --> B
    B --> C[코드 구조 분석]
    C --> D[코드 임베딩 생성]

    D --> M{모델 제공자 선택}
    M -->|Google Gemini| E1[Gemini로 생성]
    M -->|OpenAI| E2[OpenAI로 생성]
    M -->|OpenRouter| E3[OpenRouter로 생성]
    M -->|로컬 Ollama| E4[Ollama로 생성]

    E1 --> E[문서 생성]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[시각적 다이어그램 생성]
    E --> G[위키로 정리]
    F --> G
    G --> H[대화형 DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ 프로젝트 구조

```
deepwiki/
├── api/                  # 백엔드 API 서버
│   ├── main.py           # API 진입점
│   ├── api.py            # FastAPI 구현
│   ├── rag.py            # Retrieval Augmented Generation
│   ├── data_pipeline.py  # 데이터 처리 유틸리티
│   └── requirements.txt  # Python 의존성
│
├── src/                  # 프론트엔드 Next.js 앱
│   ├── app/              # Next.js 앱 디렉토리
│   │   └── page.tsx      # 메인 애플리케이션 페이지
│   └── components/       # React 컴포넌트
│       └── Mermaid.tsx   # Mermaid 다이어그램 렌더러
│
├── public/               # 정적 자산
├── package.json          # JavaScript 의존성
└── .env                  # 환경 변수 (직접 생성)
```

## 🛠️ 고급 설정

### 환경 변수

| 변수명 | 설명 | 필수 | 비고 |
|----------|-------------|----------|------|
| `GOOGLE_API_KEY` | AI 생성용 Google Gemini API 키 | 예 |
| `OPENAI_API_KEY` | 임베딩용 OpenAI API 키 | 예 |
| `OPENROUTER_API_KEY` | 대체 모델용 OpenRouter API 키 | 아니오 | OpenRouter 모델 사용 시 필요 |
| `PORT` | API 서버 포트 (기본값: 8001) | 아니오 | API와 프론트엔드를 같은 머신에서 호스팅 시 `SERVER_BASE_URL`의 포트도 변경 필요 |
| `SERVER_BASE_URL` | API 서버 기본 URL (기본값: http://localhost:8001) | 아니오 |

### 설정 파일

DeepWiki는 시스템의 다양한 측면을 관리하기 위해 JSON 설정 파일을 사용합니다:

1. **`generator.json`**: 텍스트 생성 모델 설정
   - 사용 가능한 모델 제공자(Google, OpenAI, OpenRouter, Ollama) 정의
   - 각 제공자의 기본 및 사용 가능한 모델 지정
   - temperature와 top_p 같은 모델별 매개변수 포함

2. **`embedder.json`**: 임베딩 모델 및 텍스트 처리 설정
   - 벡터 저장소용 임베딩 모델 정의
   - RAG를 위한 검색기 설정 포함
   - 문서 청킹을 위한 텍스트 분할기 설정 지정

3. **`repo.json`**: 저장소 처리 설정
   - 특정 파일 및 디렉토리를 제외하는 파일 필터 포함
   - 저장소 크기 제한 및 처리 규칙 정의

기본적으로 이러한 파일은 `api/config/` 디렉토리에 위치합니다. `DEEPWIKI_CONFIG_DIR` 환경 변수를 사용하여 위치를 사용자 정의할 수 있습니다.

### Docker 설정

Docker를 사용하여 DeepWiki를 실행할 수 있습니다:

```bash
# GitHub 컨테이너 레지스트리에서 이미지 가져오기
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# 환경 변수와 함께 컨테이너 실행
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

이 명령어는 또한 호스트의 `~/.adalflow`를 컨테이너의 `/root/.adalflow`에 마운트합니다. 이 경로는 다음을 저장하는 데 사용됩니다:
- 복제된 저장소 (`~/.adalflow/repos/`)
- 해당 저장소의 임베딩 및 인덱스 (`~/.adalflow/databases/`)
- 생성된 위키의 캐시 (`~/.adalflow/wikicache/`)

이를 통해 컨테이너가 중지되거나 제거되어도 데이터가 유지됩니다.

또는 제공된 `docker-compose.yml` 파일을 사용하세요:

```bash
# API 키가 포함된 .env 파일을 먼저 편집
docker-compose up
```

(`docker-compose.yml` 파일은 위의 `docker run` 명령어와 유사하게 데이터 지속성을 위해 `~/.adalflow`를 마운트하도록 미리 구성되어 있습니다.)

#### Docker에서 .env 파일 사용하기

.env 파일을 컨테이너에 마운트할 수도 있습니다:

```bash
# API 키가 포함된 .env 파일 생성
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# .env 파일을 마운트하여 컨테이너 실행
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

이 명령어는 또한 호스트의 `~/.adalflow`를 컨테이너의 `/root/.adalflow`에 마운트합니다. 이 경로는 다음을 저장하는 데 사용됩니다:
- 복제된 저장소 (`~/.adalflow/repos/`)
- 해당 저장소의 임베딩 및 인덱스 (`~/.adalflow/databases/`)
- 생성된 위키의 캐시 (`~/.adalflow/wikicache/`)

이를 통해 컨테이너가 중지되거나 제거되어도 데이터가 유지됩니다.

#### 로컬에서 Docker 이미지 빌드하기

로컬에서 Docker 이미지를 빌드하려면:

```bash
# 저장소 클론
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Docker 이미지 빌드
docker build -t deepwiki-open .

# 컨테이너 실행
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  deepwiki-open
```

### API 서버 상세 정보

API 서버는 다음을 제공합니다:
- 저장소 복제 및 인덱싱
- RAG (Retrieval Augmented Generation)
- 스트리밍 채팅 완성

자세한 내용은 [API README](./api/README.md)를 참조하세요.

## 🤖 제공자 기반 모델 선택 시스템

DeepWiki는 이제 여러 LLM 제공자를 지원하는 유연한 제공자 기반 모델 선택 시스템을 구현했습니다:

### 지원되는 제공자 및 모델

- **Google**: 기본값 `gemini-2.5-flash`, 또한 `gemini-2.5-flash-lite`, `gemini-2.5-pro` 등도 지원
- **OpenAI**: 기본값 `gpt-5-nano`, 또한 `gpt-5`, `4o` 등도 지원
- **OpenRouter**: Claude, Llama, Mistral 등 통합 API를 통해 다양한 모델 접근 가능
- **Ollama**: `llama3`와 같은 로컬에서 실행되는 오픈소스 모델 지원

### 환경 변수

각 제공자는 해당 API 키 환경 변수가 필요합니다:

```
# API 키
GOOGLE_API_KEY=귀하의_구글_API_키        # Google Gemini 모델에 필요
OPENAI_API_KEY=귀하의_OpenAI_키         # OpenAI 모델에 필요
OPENROUTER_API_KEY=귀하의_OpenRouter_키 # OpenRouter 모델에 필요

# OpenAI API 기본 URL 구성
OPENAI_BASE_URL=https://사용자정의_API_엔드포인트.com/v1  # 선택 사항, 사용자 정의 OpenAI API 엔드포인트용
```

### 서비스 제공자를 위한 사용자 정의 모델 선택

사용자 정의 모델 선택 기능은 다음이 필요한 서비스 제공자를 위해 특별히 설계되었습니다:

- 귀하는 조직 내 사용자에게 다양한 AI 모델 선택 옵션을 제공할 수 있습니다
- 귀하는 코드 변경 없이 빠르게 진화하는 LLM 환경에 신속하게 적응할 수 있습니다
- 귀하는 사전 정의된 목록에 없는 특수하거나 미세 조정된 모델을 지원할 수 있습니다

서비스 제공자는 사전 정의된 옵션에서 선택하거나 프론트엔드 인터페이스에서 사용자 정의 모델 식별자를 입력하여 모델 제공을 구현할 수 있습니다.

### 기업 전용 채널을 위한 기본 URL 구성

OpenAI 클라이언트의 base_url 구성은 주로 비공개 API 채널이 있는 기업 사용자를 위해 설계되었습니다. 이 기능은:

- 비공개 또는 기업 전용 API 엔드포인트 연결 가능
- 조직이 자체 호스팅되거나 사용자 정의 배포된 LLM 서비스 사용 가능
- 서드파티 OpenAI API 호환 서비스와의 통합 지원

**출시 예정**: 향후 업데이트에서 DeepWiki는 사용자가 요청에서 자신의 API 키를 제공해야 하는 모드를 지원할 예정입니다. 이를 통해 비공개 채널이 있는 기업 고객은 DeepWiki 배포와 자격 증명을 공유하지 않고도 기존 API 구성을 사용할 수 있습니다.

## 🔌 OpenRouter 통합

DeepWiki는 이제 [OpenRouter](https://openrouter.ai/)를 모델 제공자로 지원하여, 단일 API를 통해 수백 개의 AI 모델에 접근할 수 있습니다:

- **다양한 모델 옵션**: OpenAI, Anthropic, Google, Meta, Mistral 등 다양한 모델 이용 가능
- **간편한 설정**: OpenRouter API 키만 추가하고 원하는 모델 선택
- **비용 효율성**: 예산과 성능에 맞는 모델 선택 가능
- **손쉬운 전환**: 코드 변경 없이 다양한 모델 간 전환 가능

### DeepWiki에서 OpenRouter 사용법

1. **API 키 받기**: [OpenRouter](https://openrouter.ai/) 가입 후 API 키 획득
2. **환경 변수 추가**: `.env` 파일에 `OPENROUTER_API_KEY=your_key` 추가
3. **UI에서 활성화**: 홈페이지에서 "Use OpenRouter API" 옵션 체크
4. **모델 선택**: GPT-4o, Claude 3.5 Sonnet, Gemini 2.0 등 인기 모델 선택

OpenRouter는 특히 다음과 같은 경우 유용합니다:
- 여러 서비스에 가입하지 않고 다양한 모델 시도
- 지역 제한이 있는 모델 접근
- 모델 제공자별 성능 비교
- 비용과 성능 최적화

## 🤖 Ask 및 DeepResearch 기능

### Ask 기능

Ask 기능은 Retrieval Augmented Generation (RAG)을 사용해 저장소와 대화할 수 있습니다:

- **문맥 인지 답변**: 저장소 내 실제 코드 기반으로 정확한 답변 제공
- **RAG 기반**: 관련 코드 조각을 검색해 근거 있는 답변 생성
- **실시간 스트리밍**: 답변 생성 과정을 실시간으로 확인 가능
- **대화 기록 유지**: 질문 간 문맥을 유지해 더 일관된 대화 가능

### DeepResearch 기능

DeepResearch는 다중 턴 연구 프로세스를 통해 저장소 분석을 한층 심화합니다:

- **심층 조사**: 여러 연구 반복을 통해 복잡한 주제 철저히 탐구
- **구조화된 프로세스**: 연구 계획, 업데이트, 최종 결론 단계로 진행
- **자동 연속 진행**: AI가 최대 5회 반복해 연구를 계속 진행
- **연구 단계**:
  1. **연구 계획**: 접근법과 초기 발견 사항 개요 작성
  2. **연구 업데이트**: 이전 반복 내용을 바탕으로 새로운 통찰 추가
  3. **최종 결론**: 모든 반복을 종합한 포괄적 답변 제공

DeepResearch를 사용하려면 질문 제출 전 Ask 인터페이스에서 "Deep Research" 스위치를 켜세요.

## 📱 스크린샷

![DeepWiki Main Interface](screenshots/Interface.png)
*DeepWiki의 메인 인터페이스*

![Private Repository Support](screenshots/privaterepo.png)
*개인 액세스 토큰으로 비공개 저장소 접근*

![DeepResearch Feature](screenshots/DeepResearch.png)
*DeepResearch는 복잡한 주제에 대해 다중 턴 조사를 수행*

### 데모 영상

[![DeepWiki Demo Video](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*DeepWiki 작동 영상 보기!*

## ❓ 문제 해결

### API 키 문제
- **"환경 변수 누락"**: `.env` 파일이 프로젝트 루트에 있고 필요한 API 키가 포함되어 있는지 확인
- **"API 키가 유효하지 않음"**: 키를 정확히 복사했는지, 공백이 없는지 확인
- **"OpenRouter API 오류"**: OpenRouter API 키가 유효하고 충분한 크레딧이 있는지 확인

### 연결 문제
- **"API 서버에 연결할 수 없음"**: API 서버가 포트 8001에서 실행 중인지 확인
- **"CORS 오류"**: API가 모든 출처를 허용하도록 설정되어 있지만 문제가 있으면 프론트엔드와 백엔드를 같은 머신에서 실행해 보세요

### 생성 문제
- **"위키 생성 오류"**: 아주 큰 저장소는 먼저 작은 저장소로 시도해 보세요
- **"잘못된 저장소 형식"**: 유효한 GitHub, GitLab 또는 Bitbucket URL 형식인지 확인
- **"저장소 구조를 가져올 수 없음"**: 비공개 저장소라면 적절한 권한의 개인 액세스 토큰을 입력했는지 확인
- **"다이어그램 렌더링 오류"**: 앱이 자동으로 다이어그램 오류를 수정하려 시도합니다

### 일반적인 해결법
1. **서버 둘 다 재시작**: 간단한 재시작으로 대부분 문제 해결
2. **콘솔 로그 확인**: 브라우저 개발자 도구에서 자바스크립트 오류 확인
3. **API 로그 확인**: API 실행 터미널에서 Python 오류 확인

## 🤝 기여

기여를 환영합니다! 다음을 자유롭게 해주세요:
- 버그나 기능 요청을 위한 이슈 열기
- 코드 개선을 위한 풀 리퀘스트 제출
- 피드백과 아이디어 공유

## 📄 라이선스

이 프로젝트는 MIT 라이선스 하에 있습니다 - 자세한 내용은 [LICENSE](LICENSE) 파일 참고.

## ⭐ 스타 히스토리

[![Star History Chart](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.md
================================================

### ⚠️ Announcement: Shifting focus to AsyncReview
---

**IMPORTANT UPDATE** DeepWiki-Open maintenance is ongoing, but primary active development is moving to **[AsyncReview](https://github.com/AsyncFuncAI/AsyncReview/)**. Thank you for the support on this project; please join me in the new repository for this year's primary effort.

---
---

# DeepWiki-Open

![DeepWiki Banner](screenshots/Deepwiki.png)

**DeepWiki** is my own implementation attempt of DeepWiki, automatically creates beautiful, interactive wikis for any GitHub, GitLab, or BitBucket repository! Just enter a repo name, and DeepWiki will:

1. Analyze the code structure
2. Generate comprehensive documentation
3. Create visual diagrams to explain how everything works
4. Organize it all into an easy-to-navigate wiki

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)
[![Tip in Crypto](https://tip.md/badge.svg)](https://tip.md/sng-asyncfunc)
[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Features

- **Instant Documentation**: Turn any GitHub, GitLab or BitBucket repo into a wiki in seconds
- **Private Repository Support**: Securely access private repositories with personal access tokens
- **Smart Analysis**: AI-powered understanding of code structure and relationships
- **Beautiful Diagrams**: Automatic Mermaid diagrams to visualize architecture and data flow
- **Easy Navigation**: Simple, intuitive interface to explore the wiki
- **Ask Feature**: Chat with your repository using RAG-powered AI to get accurate answers
- **DeepResearch**: Multi-turn research process that thoroughly investigates complex topics
- **Multiple Model Providers**: Support for Google Gemini, OpenAI, OpenRouter, and local Ollama models
- **Flexible Embeddings**: Choose between OpenAI, Google AI, or local Ollama embeddings for optimal performance

## 🚀 Quick Start (Super Easy!)

### Option 1: Using Docker

```bash
# Clone the repository
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Create a .env file with your API keys
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# Optional: Use Google AI embeddings instead of OpenAI (recommended if using Google models)
echo "DEEPWIKI_EMBEDDER_TYPE=google" >> .env
# Optional: Add OpenRouter API key if you want to use OpenRouter models
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env
# Optional: Add Ollama host if not local. defaults to http://localhost:11434
echo "OLLAMA_HOST=your_ollama_host" >> .env
# Optional: Add Azure API key, endpoint and version if you want to use azure openai models
echo "AZURE_OPENAI_API_KEY=your_azure_openai_api_key" >> .env
echo "AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint" >> .env
echo "AZURE_OPENAI_VERSION=your_azure_openai_version" >> .env
# Run with Docker Compose
docker-compose up
```

For detailed instructions on using DeepWiki with Ollama and Docker, see [Ollama Instructions](Ollama-instruction.md).

> 💡 **Where to get these keys:**
> - Get a Google API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Get an OpenAI API key from [OpenAI Platform](https://platform.openai.com/api-keys)
> - Get Azure OpenAI credentials from [Azure Portal](https://portal.azure.com/) - create an Azure OpenAI resource and get the API key, endpoint, and API version

### Option 2: Manual Setup (Recommended)

#### Step 1: Set Up Your API Keys

Create a `.env` file in the project root with these keys:

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# Optional: Use Google AI embeddings (recommended if using Google models)
DEEPWIKI_EMBEDDER_TYPE=google
# Optional: Add this if you want to use OpenRouter models
OPENROUTER_API_KEY=your_openrouter_api_key
# Optional: Add this if you want to use Azure OpenAI models
AZURE_OPENAI_API_KEY=your_azure_openai_api_key
AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint
AZURE_OPENAI_VERSION=your_azure_openai_version
# Optional: Add Ollama host if not local. default: http://localhost:11434
OLLAMA_HOST=your_ollama_host
```

#### Step 2: Start the Backend

```bash
# Install Python dependencies
python -m pip install poetry==2.0.1 && poetry install -C api

# Start the API server
python -m api.main
```

#### Step 3: Start the Frontend

```bash
# Install JavaScript dependencies
npm install
# or
yarn install

# Start the web app
npm run dev
# or
yarn dev
```

#### Step 4: Use DeepWiki!

1. Open [http://localhost:3000](http://localhost:3000) in your browser
2. Enter a GitHub, GitLab, or Bitbucket repository (like `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, or `https://bitbucket.org/redradish/atlassian_app_versions`)
3. For private repositories, click "+ Add access tokens" and enter your GitHub or GitLab personal access token
4. Click "Generate Wiki" and watch the magic happen!

## 🔍 How It Works

DeepWiki uses AI to:

1. Clone and analyze the GitHub, GitLab, or Bitbucket repository (including private repos with token authentication)
2. Create embeddings of the code for smart retrieval
3. Generate documentation with context-aware AI (using Google Gemini, OpenAI, OpenRouter, Azure OpenAI, or local Ollama models)
4. Create visual diagrams to explain code relationships
5. Organize everything into a structured wiki
6. Enable intelligent Q&A with the repository through the Ask feature
7. Provide in-depth research capabilities with DeepResearch

```mermaid
graph TD
    A[User inputs GitHub/GitLab/Bitbucket repo] --> AA{Private repo?}
    AA -->|Yes| AB[Add access token]
    AA -->|No| B[Clone Repository]
    AB --> B
    B --> C[Analyze Code Structure]
    C --> D[Create Code Embeddings]

    D --> M{Select Model Provider}
    M -->|Google Gemini| E1[Generate with Gemini]
    M -->|OpenAI| E2[Generate with OpenAI]
    M -->|OpenRouter| E3[Generate with OpenRouter]
    M -->|Local Ollama| E4[Generate with Ollama]
    M -->|Azure| E5[Generate with Azure]

    E1 --> E[Generate Documentation]
    E2 --> E
    E3 --> E
    E4 --> E
    E5 --> E

    D --> F[Create Visual Diagrams]
    E --> G[Organize as Wiki]
    F --> G
    G --> H[Interactive DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4,E5 process;
    class H result;
```

## 🛠️ Project Structure

```
deepwiki/
├── api/                  # Backend API server
│   ├── main.py           # API entry point
│   ├── api.py            # FastAPI implementation
│   ├── rag.py            # Retrieval Augmented Generation
│   ├── data_pipeline.py  # Data processing utilities
│   ├── pyproject.toml     # Python dependencies (Poetry)
│   └── poetry.lock        # Locked Python dependency versions
│
├── src/                  # Frontend Next.js app
│   ├── app/              # Next.js app directory
│   │   └── page.tsx      # Main application page
│   └── components/       # React components
│       └── Mermaid.tsx   # Mermaid diagram renderer
│
├── public/               # Static assets
├── package.json          # JavaScript dependencies
└── .env                  # Environment variables (create this)
```

## 🤖 Provider-Based Model Selection System

DeepWiki now implements a flexible provider-based model selection system supporting multiple LLM providers:

### Supported Providers and Models

- **Google**: Default `gemini-2.5-flash`, also supports `gemini-2.5-flash-lite`, `gemini-2.5-pro`, etc.
- **OpenAI**: Default `gpt-5-nano`, also supports `gpt-5`, `4o`, etc.
- **OpenRouter**: Access to multiple models via a unified API, including Claude, Llama, Mistral, etc.
- **Azure OpenAI**: Default `gpt-4o`, also supports `o4-mini`, etc.
- **Ollama**: Support for locally running open-source models like `llama3`

### Environment Variables

Each provider requires its corresponding API key environment variables:

```
# API Keys
GOOGLE_API_KEY=your_google_api_key        # Required for Google Gemini models
OPENAI_API_KEY=your_openai_api_key        # Required for OpenAI models
OPENROUTER_API_KEY=your_openrouter_api_key # Required for OpenRouter models
AZURE_OPENAI_API_KEY=your_azure_openai_api_key  #Required for Azure OpenAI models
AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint  #Required for Azure OpenAI models
AZURE_OPENAI_VERSION=your_azure_openai_version  #Required for Azure OpenAI models

# OpenAI API Base URL Configuration
OPENAI_BASE_URL=https://custom-api-endpoint.com/v1  # Optional, for custom OpenAI API endpoints

# Ollama host
OLLAMA_HOST=your_ollama_host # Optional, if Ollama is not local. default: http://localhost:11434

# Configuration Directory
DEEPWIKI_CONFIG_DIR=/path/to/custom/config/dir  # Optional, for custom config file location
```

### Configuration Files

DeepWiki uses JSON configuration files to manage various aspects of the system:

1. **`generator.json`**: Configuration for text generation models
   - Defines available model providers (Google, OpenAI, OpenRouter, Azure, Ollama)
   - Specifies default and available models for each provider
   - Contains model-specific parameters like temperature and top_p

2. **`embedder.json`**: Configuration for embedding models and text processing
   - Defines embedding models for vector storage
   - Contains retriever configuration for RAG
   - Specifies text splitter settings for document chunking

3. **`repo.json`**: Configuration for repository handling
   - Contains file filters to exclude certain files and directories
   - Defines repository size limits and processing rules

By default, these files are located in the `api/config/` directory. You can customize their location using the `DEEPWIKI_CONFIG_DIR` environment variable.

### Custom Model Selection for Service Providers

The custom model selection feature is specifically designed for service providers who need to:

- You can offer multiple AI model choices to users within your organization
- You can quickly adapt to the rapidly evolving LLM landscape without code changes
- You can support specialized or fine-tuned models that aren't in the predefined list

Service providers can implement their model offerings by selecting from the predefined options or entering custom model identifiers in the frontend interface.

### Base URL Configuration for Enterprise Private Channels

The OpenAI Client's base_url configuration is designed primarily for enterprise users with private API channels. This feature:

- Enables connection to private or enterprise-specific API endpoints
- Allows organizations to use their own self-hosted or custom-deployed LLM services
- Supports integration with third-party OpenAI API-compatible services

**Coming Soon**: In future updates, DeepWiki will support a mode where users need to provide their own API keys in requests. This will allow enterprise customers with private channels to use their existing API arrangements without sharing credentials with the DeepWiki deployment.

## 🧩 Using OpenAI-Compatible Embedding Models (e.g., Alibaba Qwen)

If you want to use embedding models compatible with the OpenAI API (such as Alibaba Qwen), follow these steps:

1. Replace the contents of `api/config/embedder.json` with those from `api/config/embedder_openai_compatible.json`.
2. In your project root `.env` file, set the relevant environment variables, for example:
   ```
   OPENAI_API_KEY=your_api_key
   OPENAI_BASE_URL=your_openai_compatible_endpoint
   ```
3. The program will automatically substitute placeholders in embedder.json with the values from your environment variables.

This allows you to seamlessly switch to any OpenAI-compatible embedding service without code changes.

## 🧠 Using Google AI Embeddings

DeepWiki now supports Google AI's latest embedding models as an alternative to OpenAI embeddings. This provides better integration when you're already using Google Gemini models for text generation.

### Features

- **Latest Model**: Uses Google's `text-embedding-004` model
- **Same API Key**: Uses your existing `GOOGLE_API_KEY` (no additional setup required)
- **Better Integration**: Optimized for use with Google Gemini text generation models
- **Task-Specific**: Supports semantic similarity, retrieval, and classification tasks
- **Batch Processing**: Efficient processing of multiple texts

### How to Enable Google AI Embeddings

**Option 1: Environment Variable (Recommended)**

Set the embedder type in your `.env` file:

```bash
# Your existing Google API key
GOOGLE_API_KEY=your_google_api_key

# Enable Google AI embeddings
DEEPWIKI_EMBEDDER_TYPE=google
```

**Option 2: Docker Environment**

```bash
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e DEEPWIKI_EMBEDDER_TYPE=google \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

**Option 3: Docker Compose**

Add to your `.env` file:

```bash
GOOGLE_API_KEY=your_google_api_key
DEEPWIKI_EMBEDDER_TYPE=google
```

Then run:

```bash
docker-compose up
```

### Available Embedder Types

| Type | Description | API Key Required | Notes |
|------|-------------|------------------|-------|
| `openai` | OpenAI embeddings (default) | `OPENAI_API_KEY` | Uses `text-embedding-3-small` model |
| `google` | Google AI embeddings | `GOOGLE_API_KEY` | Uses `text-embedding-004` model |
| `ollama` | Local Ollama embeddings | None | Requires local Ollama installation |

### Why Use Google AI Embeddings?

- **Consistency**: If you're using Google Gemini for text generation, using Google embeddings provides better semantic consistency
- **Performance**: Google's latest embedding model offers excellent performance for retrieval tasks
- **Cost**: Competitive pricing compared to OpenAI
- **No Additional Setup**: Uses the same API key as your text generation models

### Switching Between Embedders

You can easily switch between different embedding providers:

```bash
# Use OpenAI embeddings (default)
export DEEPWIKI_EMBEDDER_TYPE=openai

# Use Google AI embeddings
export DEEPWIKI_EMBEDDER_TYPE=google

# Use local Ollama embeddings
export DEEPWIKI_EMBEDDER_TYPE=ollama
```

**Note**: When switching embedders, you may need to regenerate your repository embeddings as different models produce different vector spaces.

### Logging

DeepWiki uses Python's built-in `logging` module for diagnostic output. You can configure the verbosity and log file destination via environment variables:

| Variable        | Description                                                        | Default                      |
|-----------------|--------------------------------------------------------------------|------------------------------|
| `LOG_LEVEL`     | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).             | INFO                         |
| `LOG_FILE_PATH` | Path to the log file. If set, logs will be written to this file.   | `api/logs/application.log`   |

To enable debug logging and direct logs to a custom file:
```bash
export LOG_LEVEL=DEBUG
export LOG_FILE_PATH=./debug.log
python -m api.main
```
Or with Docker Compose:
```bash
LOG_LEVEL=DEBUG LOG_FILE_PATH=./debug.log docker-compose up
```

When running with Docker Compose, the container's `api/logs` directory is bind-mounted to `./api/logs` on your host (see the `volumes` section in `docker-compose.yml`), ensuring log files persist across restarts.

Alternatively, you can store these settings in your `.env` file:

```bash
LOG_LEVEL=DEBUG
LOG_FILE_PATH=./debug.log
```
Then simply run:

```bash
docker-compose up
```

**Logging Path Security Considerations:** In production environments, ensure the `api/logs` directory and any custom log file path are secured with appropriate filesystem permissions and access controls. The application enforces that `LOG_FILE_PATH` resides within the project's `api/logs` directory to prevent path traversal or unauthorized writes.

## 🛠️ Advanced Setup

### Environment Variables

| Variable             | Description                                                  | Required | Note                                                                                                     |
|----------------------|--------------------------------------------------------------|----------|----------------------------------------------------------------------------------------------------------|
| `GOOGLE_API_KEY`     | Google Gemini API key for AI generation and embeddings      | No | Required for Google Gemini models and Google AI embeddings                                               
| `OPENAI_API_KEY`     | OpenAI API key for embeddings and models                     | Conditional | Required if using OpenAI embeddings or models                                                            |
| `OPENROUTER_API_KEY` | OpenRouter API key for alternative models                    | No | Required only if you want to use OpenRouter models                                                       |
| `AWS_ACCESS_KEY_ID`  | AWS access key ID for Bedrock                                 | No | Required for Bedrock if not using instance/role-based credentials                                        |
| `AWS_SECRET_ACCESS_KEY` | AWS secret access key for Bedrock                          | No | Required for Bedrock if not using instance/role-based credentials                                        |
| `AWS_SESSION_TOKEN`  | AWS session token for Bedrock (STS)                            | No | Required when using temporary credentials                                                                |
| `AWS_REGION`         | AWS region for Bedrock (default: `us-east-1`)                  | No | Used by Bedrock client                                                                                   |
| `AWS_ROLE_ARN`       | AWS role ARN to assume for Bedrock                             | No | If set, the Bedrock client will call STS AssumeRole                                                     |
| `AZURE_OPENAI_API_KEY` | Azure OpenAI API key                    | No | Required only if you want to use Azure OpenAI models                                                       |
| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint                    | No | Required only if you want to use Azure OpenAI models                                                       |
| `AZURE_OPENAI_VERSION` | Azure OpenAI version                     | No | Required only if you want to use Azure OpenAI models                                                       |
| `OLLAMA_HOST`        | Ollama Host (default: http://localhost:11434)                | No | Required only if you want to use external Ollama server                                                  |
| `DEEPWIKI_EMBEDDER_TYPE` | Embedder type: `openai`, `google`, `ollama`, or `bedrock` (default: `openai`) | No | Controls which embedding provider to use                                                              |
| `PORT`               | Port for the API server (default: 8001)                      | No | If you host API and frontend on the same machine, make sure change port of `SERVER_BASE_URL` accordingly |
| `SERVER_BASE_URL`    | Base URL for the API server (default: http://localhost:8001) | No |
| `DEEPWIKI_AUTH_MODE` | Set to `true` or `1` to enable authorization mode. | No | Defaults to `false`. If enabled, `DEEPWIKI_AUTH_CODE` is required. |
| `DEEPWIKI_AUTH_CODE` | The secret code required for wiki generation when `DEEPWIKI_AUTH_MODE` is enabled. | No | Only used if `DEEPWIKI_AUTH_MODE` is `true` or `1`. |

**API Key Requirements:**
- If using `DEEPWIKI_EMBEDDER_TYPE=openai` (default): `OPENAI_API_KEY` is required
- If using `DEEPWIKI_EMBEDDER_TYPE=google`: `GOOGLE_API_KEY` is required  
- If using `DEEPWIKI_EMBEDDER_TYPE=ollama`: No API key required (local processing)
- If using `DEEPWIKI_EMBEDDER_TYPE=bedrock`: AWS credentials (or role-based credentials) are required

Other API keys are only required when configuring and using models from the corresponding providers.

## Authorization Mode

DeepWiki can be configured to run in an authorization mode, where wiki generation requires a valid authorization code. This is useful if you want to control who can use the generation feature.
Restricts frontend initiation and protects cache deletion, but doesn't fully prevent backend generation if API endpoints are hit directly.

To enable authorization mode, set the following environment variables:

- `DEEPWIKI_AUTH_MODE`: Set this to `true` or `1`. When enabled, the frontend will display an input field for the authorization code.
- `DEEPWIKI_AUTH_CODE`: Set this to the desired secret code. Restricts frontend initiation and protects cache deletion, but doesn't fully prevent backend generation if API endpoints are hit directly.

If `DEEPWIKI_AUTH_MODE` is not set or is set to `false` (or any other value than `true`/`1`), the authorization feature will be disabled, and no code will be required.

### Docker Setup

You can use Docker to run DeepWiki:

#### Running the Container

```bash
# Pull the image from GitHub Container Registry
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# Run the container with environment variables
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -e OLLAMA_HOST=your_ollama_host \
  -e AZURE_OPENAI_API_KEY=your_azure_openai_api_key \
  -e AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint \
  -e AZURE_OPENAI_VERSION=your_azure_openai_version \

  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

This command also mounts `~/.adalflow` on your host to `/root/.adalflow` in the container. This path is used to store:
- Cloned repositories (`~/.adalflow/repos/`)
- Their embeddings and indexes (`~/.adalflow/databases/`)
- Cached generated wiki content (`~/.adalflow/wikicache/`)

This ensures that your data persists even if the container is stopped or removed.

Or use the provided `docker-compose.yml` file:

```bash
# Edit the .env file with your API keys first
docker-compose up
```

(The `docker-compose.yml` file is pre-configured to mount `~/.adalflow` for data persistence, similar to the `docker run` command above.)

#### Using a .env file with Docker

You can also mount a .env file to the container:

```bash
# Create a .env file with your API keys
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env
echo "AZURE_OPENAI_API_KEY=your_azure_openai_api_key" >> .env
echo "AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint" >> .env
echo "AZURE_OPENAI_VERSION=your_azure_openai_version"  >>.env
echo "OLLAMA_HOST=your_ollama_host" >> .env

# Run the container with the .env file mounted
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

This command also mounts `~/.adalflow` on your host to `/root/.adalflow` in the container. This path is used to store:
- Cloned repositories (`~/.adalflow/repos/`)
- Their embeddings and indexes (`~/.adalflow/databases/`)
- Cached generated wiki content (`~/.adalflow/wikicache/`)

This ensures that your data persists even if the container is stopped or removed.

#### Building the Docker image locally

If you want to build the Docker image locally:

```bash
# Clone the repository
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Build the Docker image
docker build -t deepwiki-open .

# Run the container
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -e AZURE_OPENAI_API_KEY=your_azure_openai_api_key \
  -e AZURE_OPENAI_ENDPOINT=your_azure_openai_endpoint \
  -e AZURE_OPENAI_VERSION=your_azure_openai_version \
  -e OLLAMA_HOST=your_ollama_host \
  deepwiki-open
```

#### Using Self-Signed Certificates in Docker

If you're in an environment that uses self-signed certificates, you can include them in the Docker build:

1. Create a directory for your certificates (default is `certs` in your project root)
2. Copy your `.crt` or `.pem` certificate files into this directory
3. Build the Docker image:

```bash
# Build with default certificates directory (certs)
docker build .

# Or build with a custom certificates directory
docker build --build-arg CUSTOM_CERT_DIR=my-custom-certs .
```

### API Server Details

The API server provides:
- Repository cloning and indexing
- RAG (Retrieval Augmented Generation)
- Streaming chat completions

For more details, see the [API README](./api/README.md).

## 🔌 OpenRouter Integration

DeepWiki now supports [OpenRouter](https://openrouter.ai/) as a model provider, giving you access to hundreds of AI models through a single API:

- **Multiple Model Options**: Access models from OpenAI, Anthropic, Google, Meta, Mistral, and more
- **Simple Configuration**: Just add your OpenRouter API key and select the model you want to use
- **Cost Efficiency**: Choose models that fit your budget and performance needs
- **Easy Switching**: Toggle between different models without changing your code

### How to Use OpenRouter with DeepWiki

1. **Get an API Key**: Sign up at [OpenRouter](https://openrouter.ai/) and get your API key
2. **Add to Environment**: Add `OPENROUTER_API_KEY=your_key` to your `.env` file
3. **Enable in UI**: Check the "Use OpenRouter API" option on the homepage
4. **Select Model**: Choose from popular models like GPT-4o, Claude 3.5 Sonnet, Gemini 2.0, and more

OpenRouter is particularly useful if you want to:
- Try different models without signing up for multiple services
- Access models that might be restricted in your region
- Compare performance across different model providers
- Optimize for cost vs. performance based on your needs

## 🤖 Ask & DeepResearch Features

### Ask Feature

The Ask feature allows you to chat with your repository using Retrieval Augmented Generation (RAG):

- **Context-Aware Responses**: Get accurate answers based on the actual code in your repository
- **RAG-Powered**: The system retrieves relevant code snippets to provide grounded responses
- **Real-Time Streaming**: See responses as they're generated for a more interactive experience
- **Conversation History**: The system maintains context between questions for more coherent interactions

### DeepResearch Feature

DeepResearch takes repository analysis to the next level with a multi-turn research process:

- **In-Depth Investigation**: Thoroughly explores complex topics through multiple research iterations
- **Structured Process**: Follows a clear research plan with updates and a comprehensive conclusion
- **Automatic Continuation**: The AI automatically continues research until reaching a conclusion (up to 5 iterations)
- **Research Stages**:
  1. **Research Plan**: Outlines the approach and initial findings
  2. **Research Updates**: Builds on previous iterations with new insights
  3. **Final Conclusion**: Provides a comprehensive answer based on all iterations

To use DeepResearch, simply toggle the "Deep Research" switch in the Ask interface before submitting your question.

## Screenshots

![DeepWiki Main Interface](screenshots/Interface.png)
*The main interface of DeepWiki*

![Private Repository Support](screenshots/privaterepo.png)
*Access private repositories with personal access tokens*

![DeepResearch Feature](screenshots/DeepResearch.png)
*DeepResearch conducts multi-turn investigations for complex topics*

### Demo Video

[![DeepWiki Demo Video](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*Watch DeepWiki in action!*

## ❓ Troubleshooting

### API Key Issues
- **"Missing environment variables"**: Make sure your `.env` file is in the project root and contains the required API keys
- **"API key not valid"**: Check that you've copied the full key correctly with no extra spaces
- **"OpenRouter API error"**: Verify your OpenRouter API key is valid and has sufficient credits
- **"Azure OpenAI API error"**: Verify your Azure OpenAI credentials (API key, endpoint, and version) are correct and the service is properly deployed

### Connection Problems
- **"Cannot connect to API server"**: Make sure the API server is running on port 8001
- **"CORS error"**: The API is configured to allow all origins, but if you're having issues, try running both frontend and backend on the same machine

### Generation Issues
- **"Error generating wiki"**: For very large repositories, try a smaller one first
- **"Invalid repository format"**: Make sure you're using a valid GitHub, GitLab or Bitbucket URL format
- **"Could not fetch repository structure"**: For private repositories, ensure you've entered a valid personal access token with appropriate permissions
- **"Diagram rendering error"**: The app will automatically try to fix broken diagrams

### Common Solutions
1. **Restart both servers**: Sometimes a simple restart fixes most issues
2. **Check console logs**: Open browser developer tools to see any JavaScript errors
3. **Check API logs**: Look at the terminal where the API is running for Python errors

## 🤝 Contributing

Contributions are welcome! Feel free to:
- Open issues for bugs or feature requests
- Submit pull requests to improve the code
- Share your feedback and ideas

## 📄 License

This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

## ⭐ Star History

[![Star History Chart](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.pt-br.md
================================================
# DeepWiki-Open

![DeepWiki Banner](screenshots/Deepwiki.png)

**DeepWiki** é minha própria tentativa de implementação do DeepWiki, que cria automaticamente wikis bonitas e interativas para qualquer repositório GitHub, GitLab ou BitBucket! Basta inserir o nome de um repositório, e o DeepWiki irá:

1. Analisar a estrutura do código
2. Gerar documentação abrangente
3. Criar diagramas visuais para explicar como tudo funciona
4. Organizar tudo em uma wiki fácil de navegar

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)
[![Tip in Crypto](https://tip.md/badge.svg)](https://tip.md/sng-asyncfunc)
[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Recursos

- **Documentação Instantânea**: Transforme qualquer repositório GitHub, GitLab ou BitBucket em uma wiki em segundos
- **Suporte a Repositórios Privados**: Acesse repositórios privados com segurança usando tokens de acesso pessoal
- **Análise Inteligente**: Compreensão da estrutura e relacionamentos do código com IA
- **Diagramas Bonitos**: Diagramas Mermaid automáticos para visualizar arquitetura e fluxo de dados
- **Navegação Fácil**: Interface simples e intuitiva para explorar a wiki
- **Recurso de Perguntas**: Converse com seu repositório usando IA com RAG para obter respostas precisas
- **DeepResearch**: Processo de pesquisa em várias etapas que investiga minuciosamente tópicos complexos
- **Múltiplos Provedores de Modelos**: Suporte para Google Gemini, OpenAI, OpenRouter e modelos locais Ollama

## 🚀 Início Rápido (Super Fácil!)

### Opção 1: Usando Docker

```bash
# Clone o repositório
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Crie um arquivo .env com suas chaves de API
echo "GOOGLE_API_KEY=sua_chave_api_google" > .env
echo "OPENAI_API_KEY=sua_chave_api_openai" >> .env
# Opcional: Adicione a chave API OpenRouter se quiser usar modelos OpenRouter
echo "OPENROUTER_API_KEY=sua_chave_api_openrouter" >> .env
# Opcional: Adicione o host Ollama se não for local. padrão: http://localhost:11434
echo "OLLAMA_HOST=seu_host_ollama" >> .env

# Execute com Docker Compose
docker-compose up
```

Para instruções detalhadas sobre como usar o DeepWiki com Ollama e Docker, veja [Instruções do Ollama (em inglês)](Ollama-instruction.md).

> 💡 **Onde obter essas chaves:**
> - Obtenha uma chave API Google no [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Obtenha uma chave API OpenAI na [Plataforma OpenAI](https://platform.openai.com/api-keys)

### Opção 2: Configuração Manual (Recomendada)

#### Passo 1: Configure Suas Chaves API

Crie um arquivo `.env` na raiz do projeto com estas chaves:

```
GOOGLE_API_KEY=sua_chave_api_google
OPENAI_API_KEY=sua_chave_api_openai
# Opcional: Adicione isso se quiser usar modelos OpenRouter
OPENROUTER_API_KEY=sua_chave_api_openrouter
# Opcional: Adicione o host Ollama se não for local. padrão: http://localhost:11434
OLLAMA_HOST=seu_host_ollama
```

#### Passo 2: Inicie o Backend

```bash
# Instale as dependências Python
python -m pip install poetry==2.0.1 && poetry install -C api

# Inicie o servidor API
python -m api.main
```

#### Passo 3: Inicie o Frontend

```bash
# Instale as dependências JavaScript
npm install
# ou
yarn install

# Inicie o aplicativo web
npm run dev
# ou
yarn dev
```

#### Passo 4: Use o DeepWiki!

1. Abra [http://localhost:3000](http://localhost:3000) no seu navegador
2. Insira um repositório GitHub, GitLab ou Bitbucket (como `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, ou `https://bitbucket.org/redradish/atlassian_app_versions`)
3. Para repositórios privados, clique em "+ Adicionar tokens de acesso" e insira seu token de acesso pessoal do GitHub ou GitLab
4. Clique em "Gerar Wiki" e veja a mágica acontecer!

## 🔍 Como Funciona

O DeepWiki usa IA para:

1. Clonar e analisar o repositório GitHub, GitLab ou Bitbucket (incluindo repositórios privados com autenticação por token)
2. Criar embeddings do código para recuperação inteligente
3. Gerar documentação com IA contextual (usando modelos Google Gemini, OpenAI, OpenRouter ou Ollama local)
4. Criar diagramas visuais para explicar relações de código
5. Organizar tudo em uma wiki estruturada
6. Permitir perguntas e respostas inteligentes com o repositório através do recurso de Perguntas
7. Fornecer capacidades de pesquisa aprofundada com DeepResearch

```mermaid
graph TD
    A[Usuário insere repo GitHub/GitLab/Bitbucket] --> AA{Repo privado?}
    AA -->|Sim| AB[Adicionar token de acesso]
    AA -->|Não| B[Clonar Repositório]
    AB --> B
    B --> C[Analisar Estrutura do Código]
    C --> D[Criar Embeddings do Código]

    D --> M{Selecionar Provedor de Modelo}
    M -->|Google Gemini| E1[Gerar com Gemini]
    M -->|OpenAI| E2[Gerar com OpenAI]
    M -->|OpenRouter| E3[Gerar com OpenRouter]
    M -->|Ollama Local| E4[Gerar com Ollama]

    E1 --> E[Gerar Documentação]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[Criar Diagramas Visuais]
    E --> G[Organizar como Wiki]
    F --> G
    G --> H[DeepWiki Interativo]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ Estrutura do Projeto

```
deepwiki/
├── api/                  # Servidor API backend
│   ├── main.py           # Ponto de entrada da API
│   ├── api.py            # Implementação FastAPI
│   ├── rag.py            # Retrieval Augmented Generation
│   ├── data_pipeline.py  # Utilitários de processamento de dados
│   └── requirements.txt  # Dependências Python
│
├── src/                  # Aplicativo Next.js frontend
│   ├── app/              # Diretório do aplicativo Next.js
│   │   └── page.tsx      # Página principal do aplicativo
│   └── components/       # Componentes React
│       └── Mermaid.tsx   # Renderizador de diagramas Mermaid
│
├── public/               # Ativos estáticos
├── package.json          # Dependências JavaScript
└── .env                  # Variáveis de ambiente (crie este arquivo)
```

## 🤖 Sistema de Seleção de Modelos Baseado em Provedores

O DeepWiki agora implementa um sistema flexível de seleção de modelos baseado em provedores, suportando múltiplos provedores de LLM:

### Provedores e Modelos Suportados

- **Google**: Padrão `gemini-2.5-flash`, também suporta `gemini-2.5-flash-lite`, `gemini-2.5-pro`, etc.
- **OpenAI**: Padrão `gpt-5-nano`, também suporta `gpt-5`, `4o`, etc.
- **OpenRouter**: Acesso a múltiplos modelos via uma API unificada, incluindo Claude, Llama, Mistral, etc.
- **Ollama**: Suporte para modelos de código aberto executados localmente como `llama3`

### Variáveis de Ambiente

Cada provedor requer suas variáveis de ambiente de chave API correspondentes:

```
# Chaves API
GOOGLE_API_KEY=sua_chave_api_google        # Necessária para modelos Google Gemini
OPENAI_API_KEY=sua_chave_api_openai        # Necessária para modelos OpenAI
OPENROUTER_API_KEY=sua_chave_api_openrouter # Necessária para modelos OpenRouter

# Configuração de URL Base da API OpenAI
OPENAI_BASE_URL=https://endpoint-api-personalizado.com/v1  # Opcional, para endpoints de API OpenAI personalizados

# Host Ollama
OLLAMA_HOST=seu_host_ollama # Opcional, se Ollama não for local. padrão: http://localhost:11434

# Diretório de Configuração
DEEPWIKI_CONFIG_DIR=/caminho/para/dir/config/personalizado  # Opcional, para localização personalizada de arquivos de configuração
```

### Arquivos de Configuração

O DeepWiki usa arquivos de configuração JSON para gerenciar vários aspectos do sistema:

1. **`generator.json`**: Configuração para modelos de geração de texto
   - Define provedores de modelos disponíveis (Google, OpenAI, OpenRouter, Ollama)
   - Especifica modelos padrão e disponíveis para cada provedor
   - Contém parâmetros específicos de modelo como temperatura e top_p

2. **`embedder.json`**: Configuração para modelos de embedding e processamento de texto
   - Define modelos de embedding para armazenamento de vetores
   - Contém configuração do recuperador para RAG
   - Especifica configurações do divisor de texto para divisão de documentos

3. **`repo.json`**: Configuração para manipulação de repositórios
   - Contém filtros de arquivos para excluir certos arquivos e diretórios
   - Define limites de tamanho de repositório e regras de processamento

Por padrão, esses arquivos estão localizados no diretório `api/config/`. Você pode personalizar sua localização usando a variável de ambiente `DEEPWIKI_CONFIG_DIR`.

### Seleção de Modelo Personalizado para Provedores de Serviço

O recurso de seleção de modelo personalizado é especificamente projetado para provedores de serviço que precisam:

- Oferecer múltiplas opções de modelo de IA para usuários dentro de sua organização
- Adaptar-se rapidamente ao panorama de LLM em rápida evolução sem mudanças de código
- Suportar modelos especializados ou ajustados que não estão na lista predefinida

Provedores de serviço podem implementar suas ofertas de modelo selecionando entre as opções predefinidas ou inserindo identificadores de modelo personalizados na interface do frontend.

### Configuração de URL Base para Canais Privados Empresariais

A configuração base_url do Cliente OpenAI é projetada principalmente para usuários empresariais com canais de API privados. Este recurso:

- Permite conexão a endpoints de API privados ou específicos da empresa
- Permite que organizações usem seus próprios serviços LLM auto-hospedados ou implantados personalizados
- Suporta integração com serviços compatíveis com a API OpenAI de terceiros

**Em Breve**: Em atualizações futuras, o DeepWiki suportará um modo onde os usuários precisam fornecer suas próprias chaves API nas solicitações. Isso permitirá que clientes empresariais com canais privados usem seus arranjos de API existentes sem compartilhar credenciais com a implantação do DeepWiki.

## 🤩 Usando Modelos de Embedding Compatíveis com OpenAI (ex., Alibaba Qwen)

Se você deseja usar modelos de embedding compatíveis com a API OpenAI (como Alibaba Qwen), siga estas etapas:

1. Substitua o conteúdo de `api/config/embedder.json` pelo de `api/config/embedder_openai_compatible.json`.
2. No arquivo `.env` da raiz do seu projeto, defina as variáveis de ambiente relevantes, por exemplo:
   ```
   OPENAI_API_KEY=sua_chave_api
   OPENAI_BASE_URL=seu_endpoint_compativel_openai
   ```
3. O programa substituirá automaticamente os espaços reservados em embedder.json pelos valores de suas variáveis de ambiente.

Isso permite que você mude perfeitamente para qualquer serviço de embedding compatível com OpenAI sem mudanças de código.

### Logging

O DeepWiki usa o módulo `logging` integrado do Python para saída de diagnóstico. Você pode configurar a verbosidade e o destino do arquivo de log via variáveis de ambiente:

| Variável        | Descrição                                                        | Padrão                      |
|-----------------|--------------------------------------------------------------------|------------------------------|
| `LOG_LEVEL`     | Nível de logging (DEBUG, INFO, WARNING, ERROR, CRITICAL).          | INFO                         |
| `LOG_FILE_PATH` | Caminho para o arquivo de log. Se definido, logs serão escritos neste arquivo. | `api/logs/application.log`   |

Para habilitar logging de depuração e direcionar logs para um arquivo personalizado:
```bash
export LOG_LEVEL=DEBUG
export LOG_FILE_PATH=./debug.log
python -m api.main
```
Ou com Docker Compose:
```bash
LOG_LEVEL=DEBUG LOG_FILE_PATH=./debug.log docker-compose up
```

Ao executar com Docker Compose, o diretório `api/logs` do container é montado em `./api/logs` no seu host (veja a seção `volumes` em `docker-compose.yml`), garantindo que os arquivos de log persistam entre reinicializações.

Alternativamente, você pode armazenar essas configurações no seu arquivo `.env`:

```bash
LOG_LEVEL=DEBUG
LOG_FILE_PATH=./debug.log
```
Então simplesmente execute:

```bash
docker-compose up
```

**Considerações de Segurança do Caminho de Logging:** Em ambientes de produção, garanta que o diretório `api/logs` e qualquer caminho de arquivo de log personalizado estejam protegidos com permissões de sistema de arquivos e controles de acesso apropriados. O aplicativo impõe que `LOG_FILE_PATH` resida dentro do diretório `api/logs` do projeto para evitar travessia de caminho ou escritas não autorizadas.

## 🔧 Configuração Avançada

### Variáveis de Ambiente

| Variável             | Descrição                                                  | Obrigatória | Observação                                                                                                     |
|----------------------|--------------------------------------------------------------|----------|----------------------------------------------------------------------------------------------------------|
| `GOOGLE_API_KEY`     | Chave API Google Gemini para geração com IA                      | Não | Necessária apenas se você quiser usar modelos Google Gemini                                                    
| `OPENAI_API_KEY`     | Chave API OpenAI para embeddings                                | Sim | Nota: Isso é necessário mesmo se você não estiver usando modelos OpenAI, pois é usado para embeddings.              |
| `OPENROUTER_API_KEY` | Chave API OpenRouter para modelos alternativos                    | Não | Necessária apenas se você quiser usar modelos OpenRouter                                                       |
| `OLLAMA_HOST`        | Host Ollama (padrão: http://localhost:11434)                | Não | Necessária apenas se você quiser usar servidor Ollama externo                                                  |
| `PORT`               | Porta para o servidor API (padrão: 8001)                      | Não | Se você hospedar API e frontend na mesma máquina, certifique-se de alterar a porta de `SERVER_BASE_URL` de acordo |
| `SERVER_BASE_URL`    | URL base para o servidor API (padrão: http://localhost:8001) | Não |
| `DEEPWIKI_AUTH_MODE` | Defina como `true` ou `1` para habilitar o modo de autorização. | Não | Padrão é `false`. Se habilitado, `DEEPWIKI_AUTH_CODE` é necessário. |
| `DEEPWIKI_AUTH_CODE` | O código secreto necessário para geração de wiki quando `DEEPWIKI_AUTH_MODE` está habilitado. | Não | Usado apenas se `DEEPWIKI_AUTH_MODE` for `true` ou `1`. |

Se você não estiver usando o modo ollama, você precisa configurar uma chave API OpenAI para embeddings. Outras chaves API são necessárias apenas ao configurar e usar modelos dos provedores correspondentes.

## Modo de Autorização

O DeepWiki pode ser configurado para executar em um modo de autorização, onde a geração de wiki requer um código de autorização válido. Isso é útil se você quiser controlar quem pode usar o recurso de geração.
Restringe a iniciação do frontend e protege a exclusão de cache, mas não impede completamente a geração de backend se os endpoints da API forem acessados diretamente.

Para habilitar o modo de autorização, defina as seguintes variáveis de ambiente:

- `DEEPWIKI_AUTH_MODE`: Defina como `true` ou `1`. Quando habilitado, o frontend exibirá um campo de entrada para o código de autorização.
- `DEEPWIKI_AUTH_CODE`: Defina como o código secreto desejado. Restringe a iniciação do frontend e protege a exclusão de cache, mas não impede completamente a geração de backend se os endpoints da API forem acessados diretamente.

Se `DEEPWIKI_AUTH_MODE` não estiver definido ou estiver definido como `false` (ou qualquer outro valor diferente de `true`/`1`), o recurso de autorização será desativado, e nenhum código será necessário.

### Configuração Docker

Você pode usar Docker para executar o DeepWiki:

```bash
# Baixe a imagem do GitHub Container Registry
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# Execute o container com variáveis de ambiente
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=sua_chave_api_google \
  -e OPENAI_API_KEY=sua_chave_api_openai \
  -e OPENROUTER_API_KEY=sua_chave_api_openrouter \
  -e OLLAMA_HOST=seu_host_ollama \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Este comando também monta `~/.adalflow` no seu host para `/root/.adalflow` no container. Este caminho é usado para armazenar:
- Repositórios clonados (`~/.adalflow/repos/`)
- Seus embeddings e índices (`~/.adalflow/databases/`)
- Conteúdo de wiki gerado em cache (`~/.adalflow/wikicache/`)

Isso garante que seus dados persistam mesmo se o container for parado ou removido.

Ou use o arquivo `docker-compose.yml` fornecido:

```bash
# Edite o arquivo .env com suas chaves API primeiro
docker-compose up
```

(O arquivo `docker-compose.yml` é pré-configurado para montar `~/.adalflow` para persistência de dados, similar ao comando `docker run` acima.)

#### Usando um arquivo .env com Docker

Você também pode montar um arquivo .env no container:

```bash
# Crie um arquivo .env com suas chaves API
echo "GOOGLE_API_KEY=sua_chave_api_google" > .env
echo "OPENAI_API_KEY=sua_chave_api_openai" >> .env
echo "OPENROUTER_API_KEY=sua_chave_api_openrouter" >> .env
echo "OLLAMA_HOST=seu_host_ollama" >> .env

# Execute o container com o arquivo .env montado
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Este comando também monta `~/.adalflow` no seu host para `/root/.adalflow` no container. Este caminho é usado para armazenar:
- Repositórios clonados (`~/.adalflow/repos/`)
- Seus embeddings e índices (`~/.adalflow/databases/`)
- Conteúdo de wiki gerado em cache (`~/.adalflow/wikicache/`)

Isso garante que seus dados persistam mesmo se o container for parado ou removido.
#### Construindo a imagem Docker localmente

Se você quiser construir a imagem Docker localmente:

```bash
# Clone o repositório
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Construa a imagem Docker
docker build -t deepwiki-open .

# Execute o container
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=sua_chave_api_google \
  -e OPENAI_API_KEY=sua_chave_api_openai \
  -e OPENROUTER_API_KEY=sua_chave_api_openrouter \
  -e OLLAMA_HOST=seu_host_ollama \
  deepwiki-open
```

### Detalhes do Servidor API

O servidor API fornece:
- Clonagem e indexação de repositórios
- RAG (Retrieval Augmented Generation)
- Completions de chat com streaming

Para mais detalhes, veja o [README da API](./api/README.md).

## 🔌 Integração com OpenRouter

O DeepWiki agora suporta [OpenRouter](https://openrouter.ai/) como provedor de modelos, dando acesso a centenas de modelos de IA através de uma única API:

- **Múltiplas Opções de Modelos**: Acesse modelos da OpenAI, Anthropic, Google, Meta, Mistral e mais
- **Configuração Simples**: Apenas adicione sua chave API OpenRouter e selecione o modelo que deseja usar
- **Eficiência de Custo**: Escolha modelos que se adequem ao seu orçamento e necessidades de desempenho
- **Troca Fácil**: Alterne entre diferentes modelos sem alterar seu código

### Como Usar o OpenRouter com DeepWiki

1. **Obtenha uma Chave API**: Cadastre-se no [OpenRouter](https://openrouter.ai/) e obtenha sua chave API
2. **Adicione ao Ambiente**: Adicione `OPENROUTER_API_KEY=sua_chave` ao seu arquivo `.env`
3. **Habilite na UI**: Marque a opção "Usar API OpenRouter" na página inicial
4. **Selecione o Modelo**: Escolha entre modelos populares como GPT-4o, Claude 3.5 Sonnet, Gemini 2.0 e mais

O OpenRouter é particularmente útil se você quiser:
- Experimentar diferentes modelos sem se cadastrar em múltiplos serviços
- Acessar modelos que podem estar restritos em sua região
- Comparar desempenho entre diferentes provedores de modelos
- Otimizar custo vs. desempenho com base em suas necessidades

## 🤖 Recursos de Perguntas & DeepResearch

### Recurso de Perguntas

O recurso de Perguntas permite que você converse com seu repositório usando Retrieval Augmented Generation (RAG):

- **Respostas Contextuais**: Obtenha respostas precisas baseadas no código real em seu repositório
- **Alimentado por RAG**: O sistema recupera trechos de código relevantes para fornecer respostas fundamentadas
- **Streaming em Tempo Real**: Veja as respostas conforme são geradas para uma experiência mais interativa
- **Histórico de Conversação**: O sistema mantém contexto entre perguntas para interações mais coerentes

### Recurso DeepResearch

O DeepResearch leva a análise de repositórios a um novo nível com um processo de pesquisa em várias etapas:

- **Investigação Aprofundada**: Explora minuciosamente tópicos complexos através de múltiplas iterações de pesquisa
- **Processo Estruturado**: Segue um plano de pesquisa claro com atualizações e uma conclusão abrangente
- **Continuação Automática**: A IA continua automaticamente a pesquisa até chegar a uma conclusão (até 5 iterações)
- **Estágios de Pesquisa**:
  1. **Plano de Pesquisa**: Descreve a abordagem e descobertas iniciais
  2. **Atualizações de Pesquisa**: Construído sobre iterações anteriores com novos insights
  3. **Conclusão Final**: Fornece uma resposta abrangente baseada em todas as iterações

Para usar o DeepResearch, simplesmente alterne o interruptor "Pesquisa Aprofundada" na interface de Perguntas antes de enviar sua pergunta.

## 📱 Capturas de Tela

![Interface Principal do DeepWiki](screenshots/Interface.png)
*A interface principal do DeepWiki*

![Suporte a Repositórios Privados](screenshots/privaterepo.png)
*Acesse repositórios privados com tokens de acesso pessoal*

![Recurso DeepResearch](screenshots/DeepResearch.png)
*DeepResearch conduz investigações em várias etapas para tópicos complexos*

### Vídeo de Demonstração

[![Vídeo de Demonstração do DeepWiki](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*Veja o DeepWiki em ação!*

## ❓ Solução de Problemas

### Problemas com Chaves API
- **"Variáveis de ambiente ausentes"**: Certifique-se de que seu arquivo `.env` está na raiz do projeto e contém as chaves API necessárias
- **"Chave API não válida"**: Verifique se você copiou a chave completa corretamente sem espaços extras
- **"Erro de API OpenRouter"**: Verifique se sua chave API OpenRouter é válida e tem créditos suficientes

### Problemas de Conexão
- **"Não é possível conectar ao servidor API"**: Certifique-se de que o servidor API está em execução na porta 8001
- **"Erro CORS"**: A API está configurada para permitir todas as origens, mas se você estiver tendo problemas, tente executar frontend e backend na mesma máquina

### Problemas de Geração
- **"Erro ao gerar wiki"**: Para repositórios muito grandes, tente um menor primeiro
- **"Formato de repositório inválido"**: Certifique-se de que está usando um formato de URL GitHub, GitLab ou Bitbucket válido
- **"Não foi possível buscar a estrutura do repositório"**: Para repositórios privados, certifique-se de ter inserido um token de acesso pessoal válido com as permissões apropriadas
- **"Erro de renderização de diagrama"**: O aplicativo tentará corrigir automaticamente diagramas quebrados

### Soluções Comuns
1. **Reinicie ambos os servidores**: Às vezes um simples reinicio resolve a maioria dos problemas
2. **Verifique os logs do console**: Abra as ferramentas de desenvolvedor do navegador para ver quaisquer erros JavaScript
3. **Verifique os logs da API**: Olhe o terminal onde a API está em execução para erros Python

## 🤝 Contribuindo

Contribuições são bem-vindas! Sinta-se à vontade para:
- Abrir issues para bugs ou solicitações de recursos
- Enviar pull requests para melhorar o código
- Compartilhar seu feedback e ideias

## 📄 Licença

Este projeto está licenciado sob a Licença MIT - veja o arquivo [LICENSE](LICENSE) para detalhes.

## ⭐ Histórico de Estrelas

[![Gráfico de Histórico de Estrelas](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.ru.md
================================================
# DeepWiki-Open

![Баннер DeepWiki](screenshots/Deepwiki.png)

**DeepWiki** — это моя собственная реализация DeepWiki, автоматически создающая красивые, интерактивные вики по любому репозиторию на GitHub, GitLab или BitBucket! Просто укажите название репозитория, и DeepWiki выполнит:

1. Анализ структуры кода
2. Генерацию полноценной документации
3. Построение визуальных диаграмм, объясняющих работу системы
4. Организацию всего в удобную и структурированную вики

[!["Купить мне кофе"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)  
[![Поддержать в криптовалюте](https://tip.md/badge.svg)](https://tip.md/sng-asyncfunc)  
[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)  
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Возможности

- **Мгновенная документация**: Превращение любого репозитория в вики за считанные секунды
- **Поддержка приватных репозиториев**: Безопасный доступ с помощью персональных токенов
- **Умный анализ**: Понимание структуры и взаимосвязей в коде с помощью ИИ
- **Красивые диаграммы**: Автоматическая генерация диаграмм Mermaid для отображения архитектуры и потоков данных
- **Простая навигация**: Интуитивный интерфейс для изучения вики
- **Функция “Спросить”**: Общение с репозиторием через ИИ, основанный на RAG, для получения точных ответов
- **DeepResearch**: Многошаговое исследование для глубокого анализа сложных тем
- **Поддержка различных провайдеров моделей**: Google Gemini, OpenAI, OpenRouter и локальные модели Ollama

## 🚀 Быстрый старт (максимально просто!)

### Вариант 1: С использованием Docker

```bash
# Клонируйте репозиторий
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Создайте файл .env с вашими API-ключами
echo "GOOGLE_API_KEY=ваш_google_api_key" > .env
echo "OPENAI_API_KEY=ваш_openai_api_key" >> .env
# Необязательно: ключ OpenRouter
echo "OPENROUTER_API_KEY=ваш_openrouter_api_key" >> .env
# Необязательно: указать хост Ollama, если он не локальный (по умолчанию http://localhost:11434)
echo "OLLAMA_HOST=ваш_ollama_host" >> .env
# Необязательно: ключ и параметры Azure OpenAI
echo "AZURE_OPENAI_API_KEY=ваш_azure_api_key" >> .env
echo "AZURE_OPENAI_ENDPOINT=ваш_azure_endpoint" >> .env
echo "AZURE_OPENAI_VERSION=ваша_azure_version" >> .env
# Запуск через Docker Compose
docker-compose up
```

Подробную инструкцию по работе с Ollama и Docker см. в [Ollama Instructions](Ollama-instruction.md).

> 💡 **Где взять ключи API:**
> - [Google AI Studio](https://makersuite.google.com/app/apikey)
> - [OpenAI Platform](https://platform.openai.com/api-keys)
> - [Azure Portal](https://portal.azure.com/)

### Вариант 2: Ручная установка (рекомендуется)

#### Шаг 1: Установка ключей API

Создайте файл `.env` в корне проекта со следующим содержанием:

```
GOOGLE_API_KEY=ваш_google_api_key
OPENAI_API_KEY=ваш_openai_api_key
# Необязательно: для OpenRouter
OPENROUTER_API_KEY=ваш_openrouter_api_key
# Необязательно: для Azure OpenAI
AZURE_OPENAI_API_KEY=ваш_azure_api_key
AZURE_OPENAI_ENDPOINT=ваш_azure_endpoint
AZURE_OPENAI_VERSION=ваша_azure_version
# Необязательно: если Ollama не локальная
OLLAMA_HOST=ваш_ollama_host
```

#### Шаг 2: Запуск backend-сервера

```bash
# Установка зависимостей
python -m pip install poetry==2.0.1 && poetry install -C api

# Запуск API
python -m api.main
```

#### Шаг 3: Запуск frontend-интерфейса

```bash
# Установка JS-зависимостей
npm install
# или
yarn install

# Запуск веб-интерфейса
npm run dev
# или
yarn dev
```

#### Шаг 4: Используйте DeepWiki!

1. Откройте [http://localhost:3000](http://localhost:3000) в браузере
2. Введите URL репозитория (например, `https://github.com/openai/codex`)
3. Для приватных репозиториев нажмите “+ Add access tokens” и введите токен
4. Нажмите “Generate Wiki” и наблюдайте за магией!

## 🔍 Как это работает

DeepWiki использует искусственный интеллект, чтобы:

1. Клонировать и проанализировать репозиторий GitHub, GitLab или Bitbucket (включая приватные — с использованием токенов)
2. Создать эмбеддинги кода для интеллектуального поиска
3. Сгенерировать документацию с учетом контекста (с помощью Google Gemini, OpenAI, OpenRouter, Azure OpenAI или локальных моделей Ollama)
4. Построить визуальные диаграммы для отображения связей в коде
5. Организовать всё в структурированную вики
6. Включить интеллектуальное взаимодействие через функцию "Спросить"
7. Обеспечить углубленный анализ через DeepResearch

```mermaid
graph TD
    A[Пользователь вводит ссылку на репозиторий] --> AA{Приватный репозиторий?}
    AA -->|Да| AB[Добавить токен доступа]
    AA -->|Нет| B[Клонировать репозиторий]
    AB --> B
    B --> C[Анализ структуры кода]
    C --> D[Создание эмбеддингов]

    D --> M{Выбор провайдера модели}
    M -->|Google Gemini| E1[Генерация через Gemini]
    M -->|OpenAI| E2[Генерация через OpenAI]
    M -->|OpenRouter| E3[Генерация через OpenRouter]
    M -->|Локальная Ollama| E4[Генерация через Ollama]
    M -->|Azure| E5[Генерация через Azure]

    E1 --> E[Создание документации]
    E2 --> E
    E3 --> E
    E4 --> E
    E5 --> E

    D --> F[Создание диаграмм]
    E --> G[Формирование вики]
    F --> G
    G --> H[Интерактивная DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4,E5 process;
    class H result;
```

## 🛠️ Структура проекта

```
deepwiki/
├── api/                  # Backend API сервер
│   ├── main.py           # Точка входа API
│   ├── api.py            # Реализация через FastAPI
│   ├── rag.py            # RAG: генерация с дополнением
│   ├── data_pipeline.py  # Утилиты обработки данных
│   └── requirements.txt  # Зависимости Python
│
├── src/                  # Клиентское приложение на Next.js
│   ├── app/              # Каталог приложения Next.js
│   │   └── page.tsx      # Главная страница приложения
│   └── components/       # React-компоненты
│       └── Mermaid.tsx   # Рендеринг диаграмм Mermaid
│
├── public/               # Статические ресурсы
├── package.json          # JS-зависимости
└── .env                  # Переменные окружения
```

## 🤖 Система выбора моделей по провайдерам

DeepWiki поддерживает гибкую систему выбора моделей от разных поставщиков:

### Поддерживаемые провайдеры и модели

- **Google**: По умолчанию `gemini-2.5-flash`, также доступны `gemini-2.5-flash-lite`, `gemini-2.5-pro`, и др.
- **OpenAI**: По умолчанию `gpt-5-nano`, также поддерживает `gpt-5`, `4o` и другие
- **OpenRouter**: Доступ к множеству моделей через единый API (Claude, Llama, Mistral и др.)
- **Azure OpenAI**: По умолчанию `gpt-4o`, поддерживаются и другие
- **Ollama**: Локальные open-source модели, например `llama3`

### Переменные окружения

Каждому провайдеру соответствуют свои ключи:

```bash
GOOGLE_API_KEY=...         # Для моделей Google Gemini
OPENAI_API_KEY=...         # Для моделей OpenAI
OPENROUTER_API_KEY=...     # Для моделей OpenRouter
AZURE_OPENAI_API_KEY=...   # Для моделей Azure
AZURE_OPENAI_ENDPOINT=...
AZURE_OPENAI_VERSION=...

# Кастомный адрес для OpenAI API
OPENAI_BASE_URL=https://ваш-кастомный-api/v1

# Хост Ollama
OLLAMA_HOST=http://localhost:11434

# Каталог конфигурации
DEEPWIKI_CONFIG_DIR=/путь/к/конфигурации
```

### Конфигурационные файлы

DeepWiki использует JSON-файлы для настройки:

1. **`generator.json`** — конфигурация генерации текста и моделей
2. **`embedder.json`** — настройки эмбеддингов и ретривера
3. **`repo.json`** — правила обработки репозиториев

По умолчанию хранятся в `api/config/`, путь можно изменить через `DEEPWIKI_CONFIG_DIR`.

### Кастомизация для сервис-провайдеров

Функция выбора модели позволяет:

- Предоставлять выбор моделей пользователям вашей системы
- Легко адаптироваться к новым LLM без изменения кода
- Поддерживать кастомные или специализированные модели

Пользователи могут выбрать модель через интерфейс или указать свой идентификатор.

### Настройка OpenAI base_url для корпоративных клиентов

Позволяет:

- Использовать приватные API OpenAI
- Подключаться к self-hosted решениям
- Интегрироваться с совместимыми сторонними сервисами

**Скоро**: DeepWiki получит режим, в котором пользователи будут указывать свои API-ключи напрямую в запросах — удобно для корпоративных решений.

## 🧩 Использование совместимых с OpenAI моделей (например, Alibaba Qwen)

Чтобы использовать модели эмбеддингов, совместимые с OpenAI:

1. Замените `api/config/embedder.json` на `embedder_openai_compatible.json`
2. В `.env` добавьте:
```bash
OPENAI_API_KEY=ваш_ключ
OPENAI_BASE_URL=совместимый_endpoint
```

Программа автоматически подставит значения из переменных окружения.

### Логирование

DeepWiki использует стандартный `logging` из Python. Настраивается через:

| Переменная        | Описание                                      | Значение по умолчанию         |
|------------------|-----------------------------------------------|-------------------------------|
| `LOG_LEVEL`      | Уровень логов (DEBUG, INFO, WARNING и т.д.)   | INFO                          |
| `LOG_FILE_PATH`  | Путь к файлу логов                             | `api/logs/application.log`    |

Пример:
```bash
export LOG_LEVEL=DEBUG
export LOG_FILE_PATH=./debug.log
python -m api.main
```

Или через Docker Compose:
```bash
LOG_LEVEL=DEBUG LOG_FILE_PATH=./debug.log docker-compose up
```

Для постоянства логов при перезапуске контейнера `api/logs` монтируется в `./api/logs`.

Также можно указать переменные в `.env`:

```bash
LOG_LEVEL=DEBUG
LOG_FILE_PATH=./debug.log
```

И просто выполнить:

```bash
docker-compose up
```

**Безопасность логов:** в продакшене важно настроить права доступа к `api/logs`, чтобы исключить несанкционированный доступ или запись.

## 🛠️ Расширенная настройка

### Переменные окружения

| Переменная               | Назначение                                                             | Обязательно | Примечание                                                                                     |
|--------------------------|------------------------------------------------------------------------|-------------|-----------------------------------------------------------------------------------------------|
| `GOOGLE_API_KEY`         | Ключ API для Google Gemini                                             | Нет         | Только если используете модели от Google                                                      |
| `OPENAI_API_KEY`         | Ключ API для OpenAI (нужен даже для эмбеддингов)                       | Да          | Обязателен для генерации эмбеддингов                                                          |
| `OPENROUTER_API_KEY`     | Ключ API для OpenRouter                                                | Нет         | Только если используете модели OpenRouter                                                     |
| `AZURE_OPENAI_API_KEY`   | Ключ Azure OpenAI                                                      | Нет         | Только если используете Azure                                                                 |
| `AZURE_OPENAI_ENDPOINT`  | Endpoint Azure                                                         | Нет         | Только если используете Azure                                                                 |
| `AZURE_OPENAI_VERSION`   | Версия API Azure                                                       | Нет         | Только если используете Azure                                                                 |
| `OLLAMA_HOST`            | Хост Ollama (по умолчанию http://localhost:11434)                      | Нет         | Указывается при использовании внешнего сервера Ollama                                         |
| `PORT`                   | Порт API-сервера (по умолчанию 8001)                                   | Нет         | Меняйте, если frontend и backend работают на одной машине                                     |
| `SERVER_BASE_URL`        | Базовый URL для API (по умолчанию http://localhost:8001)               | Нет         |                                                                                               |
| `DEEPWIKI_AUTH_MODE`     | Включает режим авторизации (true или 1)                                | Нет         | Если включён, потребуется код из `DEEPWIKI_AUTH_CODE`                                         |
| `DEEPWIKI_AUTH_CODE`     | Секретный код для запуска генерации                                    | Нет         | Только если включён режим авторизации                                                         |

Если не используете Ollama, обязательно настройте OpenAI API ключ.

## Режим авторизации

DeepWiki может быть запущен в режиме авторизации — для генерации вики потребуется ввести секретный код. Это полезно, если вы хотите ограничить доступ к функциональности.

Для включения:

- `DEEPWIKI_AUTH_MODE=true`
- `DEEPWIKI_AUTH_CODE=секретный_код`

Это ограничивает доступ с фронтенда и защищает кэш, но не блокирует прямые вызовы API.

### Запуск через Docker

Вы можете использовать Docker:

#### Запуск контейнера

```bash
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=... \
  -e OPENAI_API_KEY=... \
  -e OPENROUTER_API_KEY=... \
  -e OLLAMA_HOST=... \
  -e AZURE_OPENAI_API_KEY=... \
  -e AZURE_OPENAI_ENDPOINT=... \
  -e AZURE_OPENAI_VERSION=... \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Каталог `~/.adalflow` содержит:

- Клонированные репозитории
- Эмбеддинги и индексы
- Сгенерированные кэшированные вики

#### Docker Compose

```bash
# Убедитесь, что .env заполнен
docker-compose up
```

#### Использование .env

```bash
echo "GOOGLE_API_KEY=..." > .env
...
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

#### Локальная сборка Docker-образа

```bash
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

docker build -t deepwiki-open .

docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=... \
  -e OPENAI_API_KEY=... \
  ... \
  deepwiki-open
```

#### Самоподписанные сертификаты

1. Создайте каталог `certs` (или свой)
2. Поместите сертификаты `.crt` или `.pem`
3. Соберите образ:

```bash
docker build --build-arg CUSTOM_CERT_DIR=certs .
```

### Описание API

Сервер API:

- Клонирует и индексирует репозитории
- Реализует RAG
- Поддерживает потоковую генерацию

См. подробности в [API README](./api/README.md)

## 🔌 Интеграция с OpenRouter

Платформа [OpenRouter](https://openrouter.ai/) предоставляет доступ ко множеству моделей:

- **Много моделей**: OpenAI, Anthropic, Google, Meta и др.
- **Простая настройка**: достаточно API-ключа
- **Гибкость и экономия**: выбирайте модели по цене и производительности
- **Быстрое переключение**: без изменения кода

### Как использовать

1. Получите ключ на [OpenRouter](https://openrouter.ai/)
2. Добавьте `OPENROUTER_API_KEY=...` в `.env`
3. Активируйте в интерфейсе
4. Выберите модель (например GPT-4o, Claude 3.5, Gemini 2.0 и др.)

Подходит для:

- Тестирования разных моделей без регистрации в каждом сервисе
- Доступа к моделям в регионах с ограничениями
- Сравнения производительности
- Оптимизации затрат

## 🤖 Возможности Ask и DeepResearch

### Ask

- **Ответы по коду**: AI использует содержимое репозитория
- **RAG**: подбираются релевантные фрагменты
- **Потоковая генерация**: ответы формируются в реальном времени
- **История общения**: поддерживается контекст

### DeepResearch

Функция глубокого анализа:

- **Многошаговый подход**: AI сам исследует тему
- **Этапы исследования**:
  1. План
  2. Промежуточные результаты
  3. Итоговый вывод

Активируется переключателем "Deep Research".

## 📱 Скриншоты

![Интерфейс](screenshots/Interface.png)  
*Основной интерфейс DeepWiki*

![Приватный доступ](screenshots/privaterepo.png)  
*Доступ к приватным репозиториям*

![DeepResearch](screenshots/DeepResearch.png)  
*DeepResearch анализирует сложные темы*

### Видео-демо

[![Видео](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

## ❓ Решение проблем

### Проблемы с API-ключами

- **“Отсутствуют переменные окружения”** — проверьте `.env`
- **“Неверный ключ”** — уберите пробелы
- **“Ошибка OpenRouter API”** — проверьте ключ и баланс
- **“Ошибка Azure API”** — проверьте ключ, endpoint и версию

### Проблемы с подключением

- **“Нет подключения к API”** — убедитесь, что сервер запущен на 8001
- **“CORS ошибка”** — пробуйте запускать frontend и backend на одной машине

### Ошибки генерации

- **“Ошибка генерации вики”** — попробуйте меньший репозиторий
- **“Неверный формат ссылки”** — используйте корректные ссылки
- **“Нет структуры репозитория”** — проверьте токен доступа
- **“Ошибка диаграмм”** — система попытается автоматически исправить

### Универсальные советы

1. Перезапустите frontend и backend
2. Проверьте консоль браузера
3. Проверьте логи API

## 🤝 Участие

Вы можете:

- Заводить issues
- Отправлять pull requests
- Делиться идеями

## 📄 Лицензия

Проект распространяется под лицензией MIT. См. файл [LICENSE](LICENSE)

## ⭐ История звёзд

[![График звёзд](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.vi.md
================================================
# DeepWiki-Open

![DeepWiki Banner](screenshots/Deepwiki.png)

**Open DeepWiki** là 1 triển khai thay thế cho DeepWiki, tự động tạo ra các trang wiki cho bất kỳ Repository  nào trên GitHub, GitLab hoặc BitBucket! Chỉ cần nhập đường dẫn Repository, và DeepWiki sẽ:

1. Phân tích cấu trúc mã nguồn
2. Tạo tài liệu đầy đủ và chi tiết
3. Tạo sơ đồ trực quan để giải thích cách mọi thứ hoạt động
4. Sắp xếp tất cả documents thành một wiki dễ hiểu

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)

[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ Tính năng

- **Tạo Tài liệu tức thì**: Biến bất kỳ Repository GitHub, GitLab hoặc BitBucket nào thành wiki chỉ trong vài giây
- **Hỗ trợ Private Repository**: Truy cập Private Repository một cách an toàn với personal access tokens
- **Phân tích thông minh**: Hiểu cấu trúc và mối quan hệ của source codes nhờ AI
- **Tự động tạo Sơ đồ**: Tự động tạo sơ đồ Mermaid để trực quan hóa kiến trúc và luồng dữ liệu
- **Dễ dàng thao tác**:Giao diện wiki đơn giản, trực quan để khám phá
- **Trò chuyện với repository**: Trò chuyện với repo của bạn bằng AI (tích hợp RAG) để nhận câu trả lời chính xác
- **DeepResearch**:Quy trình Deep Research nhiều bước giúp phân tích kỹ lưỡng các chủ đề phức tạp
- **Hỗ trợ nhiều mô hình**: Hỗ trợ Google Gemini, OpenAI, OpenRouter, và  local Ollama models

## 🚀 Bắt đầu (Siêu dễ :))

### Option 1: Sử dụng Docker

```bash
# Clone repository
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Tạo .env file với API keys
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# Optional: Thêm OpenRouter API key nếu bạn muốn OpenRouter models
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# Run với Docker Compose
docker-compose up
```

> 💡 **Hướng dẫn lấy Keys**
> - Lấy Google API key từ [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Lấy OpenAI API key từ [OpenAI Platform](https://platform.openai.com/api-keys)

### Option 2: Setup thủ công (Khuyên dùng)

#### Bước 1: Set Up API Keys

Tạo  `.env` file trong thư mục gốc của project với những keys vừa tạo:

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# Optional: Thêm OpenRouter API key nếu bạn muốn OpenRouter models
OPENROUTER_API_KEY=your_openrouter_api_key
```

#### Bước 2: Bắt đầu với Backend

```bash
# Cài đặt Python dependencies
python -m pip install poetry==2.0.1 && poetry install -C api

# Chạy API server
python -m api.main
```

#### Bước 3: Bắt đầu với Frontend

```bash
# Cài đặt JavaScript dependencies
npm install
# Hoặc
yarn install

# Chạy the web app
npm run dev
# Hoặc
yarn dev
```

#### Bước 4: Dùng DeepWiki!

1. Mở [http://localhost:3000](http://localhost:3000) trên trình duyệt
2. Nhập đường dẫn GitHub, GitLab, hoặt Bitbucket repository (ví dụ như `https://github.com/openai/codex`, `https://github.com/microsoft/autogen`, `https://gitlab.com/gitlab-org/gitlab`, hay `https://bitbucket.org/redradish/atlassian_app_versions`)
3. Cho private repositories, Nhấn "+ Add access tokens" và nhập your GitHub hoặt GitLab personal access token
4. Click "Generate Wiki" và xem kết quả!

## 🔍 Cách Open Deepwiki hoạt động

DeepWiki dùng AI để:

1. Clone và phân tích GitHub, GitLab, hoặc Bitbucket repository (bao gồm private repos với token authentication)
2. Tạo embeddings cho code (Rag support)
3. Tạo documentation với context-aware AI (dùng Google Gemini, OpenAI, OpenRouter, hay local Ollama models)
4. Tạo diagrams để giải thích code relationships
5. Organize thông tin thành 1 trang wiki
6. Cho phép Q&A với repository
7. Cung cấp khả năng DeepResearch

```mermaid
graph TD
    A[User inputs GitHub/GitLab/Bitbucket repo] --> AA{Private repo?}
    AA -->|Yes| AB[Add access token]
    AA -->|No| B[Clone Repository]
    AB --> B
    B --> C[Analyze Code Structure]
    C --> D[Create Code Embeddings]

    D --> M{Select Model Provider}
    M -->|Google Gemini| E1[Generate with Gemini]
    M -->|OpenAI| E2[Generate with OpenAI]
    M -->|OpenRouter| E3[Generate with OpenRouter]
    M -->|Local Ollama| E4[Generate with Ollama]

    E1 --> E[Generate Documentation]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[Create Visual Diagrams]
    E --> G[Organize as Wiki]
    F --> G
    G --> H[Interactive DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ Cấu trúc dự án

```
deepwiki/
├── api/                  # Backend API server
│   ├── main.py           # API
│   ├── api.py            # FastAPI
│   ├── rag.py            # Retrieval Augmented Generation (RAG)
│   ├── data_pipeline.py  # Data processing utilities
│   └── requirements.txt  # Python dependencies
│
├── src/                  # Frontend Next.js app
│   ├── app/              # Next.js app directory
│   │   └── page.tsx      # Main application page
│   └── components/       # React components
│       └── Mermaid.tsx   # Mermaid diagram renderer
│
├── public/               # Static assets
├── package.json          # JavaScript dependencies
└── .env                  # Environment variables (create this)
```

## 🛠️ Cài đặt nâng cao

### Biến môi trường

| Biến môi trường | Mô tả | bắt buộc | ghi chú |
|----------|-------------|----------|------|
| `GOOGLE_API_KEY` | Google Gemini API key  | Có |
| `OPENAI_API_KEY` | OpenAI API key   | có |
| `OPENROUTER_API_KEY` | OpenRouter API key   | không| Yêu cầu nếu bạn muốn dùng OpenRouter models |
| `PORT` | Port của API server (mặc định: 8001) | không | Nếu bạn muốn chạy API và frontend trên cùng 1 máy, hãy điều chỉnh Port `SERVER_BASE_URL` |
| `SERVER_BASE_URL` | Đường dẫnn mặt định của API server (mặc định: http://localhost:8001) | không |

### Cài Đặt với Docker

Bạn có thể dùng Docker để run DeepWiki:

```bash
# Pull Docker image từ GitHub Container Registry
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# Chạy container với biến môi trường
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

Hoặc đơn giản hơn, sử dụng `docker-compose.yml` :

```bash
# Edit the .env file with your API keys first
docker-compose up
```

#### Sử dụng  .env file với Docker

Bạn có thể "mount"  .env file vào container:

```bash
# Tạo .env file với your API keys
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# Run container với .env file
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

#### Bạn có thể Building the Docker image trên máy cục bộ


```bash
# Clone repository
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# Build Docker image
docker build -t deepwiki-open .

# Chạy container
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  deepwiki-open
```

### Chi tiết API Server

API server cung cấp:
- Repository cloning và indexing
- RAG (Retrieval Augmented Generation)
- Trò chuyện liên tục

Biết thêm chi tiết truy cập [ API README](./api/README.md).

## 🤖 Hệ thống lựa chọn mô hình dựa trên nhà cung cấp

DeepWiki hiện đã triển khai một hệ thống lựa chọn mô hình linh hoạt dựa trên nhiều nhà cung cấp LLM:

### Các nhà cung cấp và mô hình được hỗ trợ

- **Google**: Mặc định là `gemini-2.5-flash`, cũng hỗ trợ `gemini-2.5-flash-lite`, `gemini-2.5-pro`, v.v.
- **OpenAI**: Mặc định là `gpt-5-nano`, cũng hỗ trợ `gpt-5`, `4o`, v.v.
- **OpenRouter**: Truy cập nhiều mô hình qua một API thống nhất, bao gồm Claude, Llama, Mistral, v.v.
- **Ollama**: Hỗ trợ các mô hình mã nguồn mở chạy cục bộ như `llama3`

### Biến môi trường

Mỗi nhà cung cấp yêu cầu các biến môi trường API key tương ứng:

```
# API Keys
GOOGLE_API_KEY=google_api_key_của_bạn        # Bắt buộc cho các mô hình Google Gemini
OPENAI_API_KEY=openai_key_của_bạn            # Bắt buộc cho các mô hình OpenAI
OPENROUTER_API_KEY=openrouter_key_của_bạn    # Bắt buộc cho các mô hình OpenRouter

# Cấu hình URL cơ sở cho OpenAI API
OPENAI_BASE_URL=https://endpoint-tùy-chỉnh.com/v1  # Tùy chọn, cho các điểm cuối API OpenAI tùy chỉnh

# Thư mục cấu hình
DEEPWIKI_CONFIG_DIR=/đường/dẫn/đến/thư_mục/cấu_hình  # Tùy chọn, cho vị trí tệp cấu hình tùy chỉnh
```

### Tệp cấu hình

DeepWiki sử dụng các tệp cấu hình JSON để quản lý các khía cạnh khác nhau của hệ thống:

1. **`generator.json`**: Cấu hình cho các mô hình tạo văn bản
   - Xác định các nhà cung cấp mô hình có sẵn (Google, OpenAI, OpenRouter, Ollama)
   - Chỉ định các mô hình mặc định và có sẵn cho mỗi nhà cung cấp
   - Chứa các tham số đặc thù cho mô hình như temperature và top_p

2. **`embedder.json`**: Cấu hình cho mô hình embedding và xử lý văn bản
   - Xác định mô hình embedding cho lưu trữ vector
   - Chứa cấu hình bộ truy xuất cho RAG
   - Chỉ định cài đặt trình chia văn bản để phân đoạn tài liệu

3. **`repo.json`**: Cấu hình xử lý repository
   - Chứa bộ lọc tệp để loại trừ một số tệp và thư mục nhất định
   - Xác định giới hạn kích thước repository và quy tắc xử lý

Mặc định, các tệp này nằm trong thư mục `api/config/`. Bạn có thể tùy chỉnh vị trí của chúng bằng biến môi trường `DEEPWIKI_CONFIG_DIR`.

### Lựa chọn mô hình tùy chỉnh cho nhà cung cấp dịch vụ

Tính năng lựa chọn mô hình tùy chỉnh được thiết kế đặc biệt cho các nhà cung cấp dịch vụ cần:

- Bạn có thể cung cấp cho người dùng trong tổ chức của mình nhiều lựa chọn mô hình AI khác nhau
- Bạn có thể thích ứng nhanh chóng với môi trường LLM đang phát triển nhanh chóng mà không cần thay đổi mã
- Bạn có thể hỗ trợ các mô hình chuyên biệt hoặc được tinh chỉnh không có trong danh sách định nghĩa trước

Bạn có thể triển khai các mô hình cung cấp bằng cách chọn từ các tùy chọn định nghĩa trước hoặc nhập định danh mô hình tùy chỉnh trong giao diện người dùng.

### Cấu hình URL cơ sở cho các kênh riêng doanh nghiệp

Cấu hình base_url của OpenAI Client được thiết kế chủ yếu cho người dùng doanh nghiệp có các kênh API riêng. Tính năng này:

- Cho phép kết nối với các điểm cuối API riêng hoặc dành riêng cho doanh nghiệp
- Cho phép các tổ chức sử dụng dịch vụ LLM tự lưu trữ hoặc triển khai tùy chỉnh
- Hỗ trợ tích hợp với các dịch vụ tương thích API OpenAI của bên thứ ba

**Sắp ra mắt**: Trong các bản cập nhật tương lai, DeepWiki sẽ hỗ trợ chế độ mà người dùng cần cung cấp API key của riêng họ trong các yêu cầu. Điều này sẽ cho phép khách hàng doanh nghiệp có kênh riêng sử dụng cấu hình API hiện có mà không cần chia sẻ thông tin đăng nhập với triển khai DeepWiki.

## 🔌 Tích hợp OpenRouter

DeepWiki hiện đã hỗ trợ [OpenRouter](https://openrouter.ai/) làm nhà cung cấp mô hình, cho phép bạn truy cập hàng trăm mô hình AI thông qua một API duy nhất:

- **Nhiều tùy chọn mô hình**: Truy cập các mô hình từ OpenAI, Anthropic, Google, Meta, Mistral và nhiều nhà cung cấp khác
- **Cấu hình đơn giản**: Chỉ cần thêm khóa API của bạn từ OpenRouter và chọn mô hình bạn muốn sử dụng
- **Tiết kiệm chi phí**: Lựa chọn mô hình phù hợp với ngân sách và nhu cầu hiệu suất của bạn
- **Chuyển đổi dễ dàng**: Chuyển đổi giữa các mô hình khác nhau mà không cần thay đổi mã nguồn


### Cách sử dụng OpenRouter với DeepWiki

1. **Lấy API Key**: Đăng ký tại [OpenRouter](https://openrouter.ai/) và lấy khóa API
2. **Thêm vào biến môi trường**: Thêm `OPENROUTER_API_KEY=your_key` vào file `.env`
3. **Bật trong giao diện**: Chọn "Use OpenRouter API" trên trang chủ
4. **Chọn mô hình**: Lựa chọn từ các mô hình phổ biến như GPT-4o, Claude 3.5 Sonnet, Gemini 2.0 và nhiều hơn nữa


OpenRouter đặc biệt hữu ích nếu bạn muốn:

- Thử nhiều mô hình khác nhau mà không cần đăng ký nhiều dịch vụ
- Truy cập các mô hình có thể bị giới hạn tại khu vực của bạn
- So sánh hiệu năng giữa các nhà cung cấp mô hình khác nhau
- Tối ưu hóa chi phí so với hiệu suất dựa trên nhu cầu của bạn


## 🤖 Tính năng Hỏi & Nghiên cứu Sâu (DeepResearch)

### Tính năng Hỏi (Ask)

Tính năng Hỏi cho phép bạn trò chuyện với kho mã của mình bằng cách sử dụng kỹ thuật RAG (Retrieval Augmented Generation):

- **Phản hồi theo ngữ cảnh**: Nhận câu trả lời chính xác dựa trên mã thực tế trong kho của bạn
- **Ứng dụng RAG**: Hệ thống truy xuất các đoạn mã liên quan để tạo ra câu trả lời có cơ sở
- **Phản hồi theo thởi gian thực**: Xem câu trả lời được tạo ra trực tiếp, mang lại trải nghiệm tương tác hơn
- **Lưu lịch sử cuộc trò chuyện**: Hệ thống duy trì ngữ cảnh giữa các câu hỏi để cuộc đối thoại liền mạch hơn


### Tính năng DeepResearch

DeepResearch nâng tầm phân tích kho mã với quy trình nghiện cứu nhiểu vòng:

- **Ngieen cứu chuyên sâu**: Khám phá kỹ lưỡng các chủ đề phức tạp thông qua nhiểu vòng nghiện cứu
- **Quy trình có cấu trúc**: Tuân theo kế hoạch nghiện cứu rõ ràng với các bản cập nhật và kết luận tổng thể
- **Tự động tiếp tục**: AI sẽ tự động tiếp tục quá trình nghiện cứu cho đến khi đưa ra kết luận (tối đa 5 vòng)
- **Các giai đoạn nghiện cứu**:
  1. **Kế hoạch nghiện cứu**: Phác thảo phương pháp và những phát hiện ban đầu
  2. **Cập nhật nghiện cứu**: Bổ sung kiến thức mới qua từng vòng lặp
  3. **Kết luận cuối cùng**: Đưa ra câu trả lời toàn diện dựa trên tất cả các vòng nghiện cứu

Để sử dụng DeepResearch, chỉ cần bật công tắc "Deep Research" trong giao diện Hỏi (Ask) trước khi gửi câu hỏi của bạn.


## 📱 Ảnh chụp màng hình

![Giao diện chính của DeepWiki](screenshots/Interface.png)
*Giao diện chính của DeepWiki*

![Hỗ trợ kho riêng tư](screenshots/privaterepo.png)
*Truy cập kho riêng tư bằng Personal Access Token*

![Tính năng DeepResearch](screenshots/DeepResearch.png)
*DeepResearch thực hiện nghiện cứu nhiểu vòng cho các chủ đề phức tạp*

### Demo Video

[![DeepWiki Demo Video](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)


## ❓ Khắc phục sự cố

### Vấn đề với API Key
- **"Thiếu biến môi trường"**: Đảm bảo rằng file `.env` của bạn nằm ở thư mục gốc của dự án và chứa các API key cần thiết
- **"API key không hợp lệ"**: Kiểm tra lại xem bạn đã sao chép đầy đủ API key mà không có khoảng trắng thừa chưa
- **"Lỗi API OpenRouter"**: Xác minh rằng API key của OpenRouter là hợp lệ và có đủ tín dụng

### Vấn đề kết nối
- **"Không thể kết nối với máy chủ API"**: Đảm bảo máy chủ API đang chạy trên cổng 8001
- **"Lỗi CORS"**: API được cấu hình để cho phép tất cả các nguồn gốc, nhưng nếu gặp sự cố, thử chạy cả frontend và backend trên cùng một máy tính

### Vấn đề khi tạo nội dung
- **"Lỗi khi tạo wiki"**: Với các kho mã rất lớn, hãy thử trước với kho mã nhỏ hơn
- **"Định dạng kho mã không hợp lệ"**: Đảm bảo bạn đang sử dụng định dạng URL hợp lệ cho GitHub, GitLab hoặc Bitbucket
- **"Không thể lấy cấu trúc kho mã"**: Với các kho mã riêng tư, hãy đảm bảo bạn đã nhập token truy cập cá nhân hợp lệ và có quyền truy cập phù hợp
- **"Lỗi khi render sơ đồ"**: Ứng dụng sẽ tự động thử khắc phục các sơ đồ bị lỗi

### Các giải pháp phổ biến
1. **Khởi động lại cả hai máy chủ**: Đôi khi, một lần khởi động lại đơn giản có thể giải quyết hầu hết các vấn đề
2. **Kiểm tra nhật ký trình duyệt**: Mở công cụ phát triển của trình duyệt để xem các lỗi JavaScript
3. **Kiểm tra nhật ký API**: Xem các lỗi Python trong terminal nơi API đang chạy


## 🤝 Đóng góp

Chúng tôi hoan nghênh mọi đóng góp! Bạn có thể:
- Mở các vấn đề (issues) để báo lỗi hoặc yêu cầu tính năng
- Gửi pull request để cải thiện mã nguồn
- Chia sẻ phản hồi và ý tưởng của bạn

## 📄 Giấy phép

Dự án này được cấp phép theo Giấy phép MIT - xem file [LICENSE](LICENSE) để biết chi tiết.

## ⭐ Lịch sử

[![Biểu đồ lịch sử](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.zh-tw.md
================================================
# DeepWiki-Open

![DeepWiki 橫幅](screenshots/Deepwiki.png)

**DeepWiki** 可以為任何 GitHub、GitLab 或 BitBucket 程式碼儲存庫自動建立美觀、互動式的 Wiki！只需輸入儲存庫名稱，DeepWiki 將：

1. 分析程式碼結構
2. 產生全面的文件
3. 建立視覺化圖表解釋一切如何運作
4. 將所有內容整理成易於導覽的 Wiki

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)
[![Tip in Crypto](https://tip.md/badge.svg)](https://tip.md/sng-asyncfunc)
[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ 特點

- **即時文件**：幾秒鐘內將任何 GitHub、GitLab 或 BitBucket 儲存庫轉換為 Wiki
- **私人儲存庫支援**：使用個人存取權杖安全存取私人儲存庫
- **智慧分析**：AI 驅動的程式碼結構和關係理解
- **精美圖表**：自動產生 Mermaid 圖表視覺化架構和資料流
- **簡易導覽**：簡單、直觀的介面探索 Wiki
- **提問功能**：使用 RAG 驅動的 AI 與您的儲存庫聊天，取得準確答案
- **深度研究**：多輪研究過程，徹底調查複雜主題
- **多模型提供商**：支援 Google Gemini、OpenAI、OpenRouter 和本機 Ollama 模型

## 🚀 快速開始（超級簡單！）

### 選項 1：使用 Docker

```bash
# 複製儲存庫
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# 建立包含 API 金鑰的 .env 檔案
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# 可選：如果您想使用 OpenRouter 模型，新增 OpenRouter API 金鑰
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env
# 可選：如果 Ollama 不在本機執行，新增 Ollama 主機位址，預設為 http://localhost:11434
echo "OLLAMA_HOST=your_ollama_host" >> .env

# 使用 Docker Compose 執行
docker-compose up
```

有關使用 DeepWiki 搭配 Ollama 和 Docker 的詳細說明，請參閱 [Ollama 操作說明](Ollama-instruction.md)。

(上述 Docker 命令以及 `docker-compose.yml` 設定會掛載您主機上的 `~/.adalflow` 目錄到容器內的 `/root/.adalflow`。此路徑用於儲存：
- 複製的儲存庫 (`~/.adalflow/repos/`)
- 儲存庫的嵌入和索引 (`~/.adalflow/databases/`)
- 快取的已產生 Wiki 內容 (`~/.adalflow/wikicache/`)

這確保了即使容器停止或移除，您的資料也能持久保存。)

> 💡 **取得這些金鑰的地方：**
> - 從 [Google AI Studio](https://makersuite.google.com/app/apikey) 取得 Google API 金鑰
> - 從 [OpenAI Platform](https://platform.openai.com/api-keys) 取得 OpenAI API 金鑰

### 選項 2：手動設定（推薦）

#### 步驟 1：設定 API 金鑰

在專案根目錄建立一個 `.env` 檔案，包含以下金鑰：

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# 可選：如果您想使用 OpenRouter 模型，新增此項
OPENROUTER_API_KEY=your_openrouter_api_key
# 可選：如果 Ollama 不在本機執行，新增 Ollama 主機位址，預設為 http://localhost:11434
OLLAMA_HOST=your_ollama_host
```

#### 步驟 2：啟動後端

```bash
# 安裝 Python 相依性
python -m pip install poetry==2.0.1 && poetry install -C api

# 啟動 API 伺服器
python -m api.main
```

#### 步驟 3：啟動前端

```bash
# 安裝 JavaScript 相依性
npm install
# 或
yarn install

# 啟動 Web 應用
npm run dev
# 或
yarn dev
```

#### 步驟 4：使用 DeepWiki！

1. 在瀏覽器中開啟 [http://localhost:3000](http://localhost:3000)
2. 輸入 GitHub、GitLab 或 Bitbucket 儲存庫（如 `https://github.com/openai/codex`、`https://github.com/microsoft/autogen`、`https://gitlab.com/gitlab-org/gitlab` 或 `https://bitbucket.org/redradish/atlassian_app_versions`）
3. 對於私人儲存庫，點擊「+ 新增存取權杖」並輸入您的 GitHub 或 GitLab 個人存取權杖
4. 點擊「產生 Wiki」，見證奇蹟的發生！

## 🔍 工作原理

DeepWiki 使用 AI 來：

1. 複製並分析 GitHub、GitLab 或 Bitbucket 儲存庫（包括使用權杖驗證的私人儲存庫）
2. 建立程式碼嵌入用於智慧檢索
3. 使用上下文感知 AI 產生文件（使用 Google Gemini、OpenAI、OpenRouter 或本機 Ollama 模型）
4. 建立視覺化圖表解釋程式碼關係
5. 將所有內容組織成結構化 Wiki
6. 透過提問功能實現與儲存庫的智慧問答
7. 透過深度研究功能提供深入研究能力

```mermaid
graph TD
    A[使用者輸入 GitHub/GitLab/Bitbucket 儲存庫] --> AA{私人儲存庫?}
    AA -->|是| AB[新增存取權杖]
    AA -->|否| B[複製儲存庫]
    AB --> B
    B --> C[分析程式碼結構]
    C --> D[建立程式碼嵌入]

    D --> M{選擇模型提供商}
    M -->|Google Gemini| E1[使用 Gemini 產生]
    M -->|OpenAI| E2[使用 OpenAI 產生]
    M -->|OpenRouter| E3[使用 OpenRouter 產生]
    M -->|本機 Ollama| E4[使用 Ollama 產生]

    E1 --> E[產生文件]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[建立視覺化圖表]
    E --> G[組織為 Wiki]
    F --> G
    G --> H[互動式 DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ 專案結構

```
deepwiki/
├── api/                  # 後端 API 伺服器
│   ├── main.py           # API 進入點
│   ├── api.py            # FastAPI 實作
│   ├── rag.py            # 檢索增強產生
│   ├── data_pipeline.py  # 資料處理工具
│   └── requirements.txt  # Python 相依性
│
├── src/                  # 前端 Next.js 應用
│   ├── app/              # Next.js 應用目錄
│   │   └── page.tsx      # 主應用頁面
│   └── components/       # React 元件
│       └── Mermaid.tsx   # Mermaid 圖表渲染器
│
├── public/               # 靜態資源
├── package.json          # JavaScript 相依性
└── .env                  # 環境變數（需要建立）
```

## 🤖 基於提供商的模型選擇系統

DeepWiki 現在實作了靈活的基於提供商的模型選擇系統，支援多種 LLM 提供商：

### 支援的提供商和模型

- **Google**：預設 `gemini-2.5-flash`，也支援 `gemini-2.5-flash-lite`、`gemini-2.5-pro` 等
- **OpenAI**：預設 `gpt-5-nano`，也支援 `gpt-5`, `4o` 等
- **OpenRouter**：透過統一 API 存取多種模型，包括 Claude、Llama、Mistral 等
- **Ollama**：支援本機執行的開源模型，如 `llama3`

### 環境變數

每個提供商都需要對應的 API 金鑰環境變數：

```
# API 金鑰
GOOGLE_API_KEY=your_google_api_key        # 使用 Google Gemini 模型時必需
OPENAI_API_KEY=your_openai_api_key        # 使用 OpenAI 模型時必需
OPENROUTER_API_KEY=your_openrouter_api_key # 使用 OpenRouter 模型時必需

# OpenAI API 基礎 URL 設定
OPENAI_BASE_URL=https://custom-api-endpoint.com/v1  # 可選，用於自訂 OpenAI API 端點

# Ollama 主機
OLLAMA_HOST=your_ollama_host # 可選，如果 Ollama 不在本機執行，預設為 http://localhost:11434

# 設定檔目錄
DEEPWIKI_CONFIG_DIR=/path/to/custom/config/dir  # 可選，用於自訂設定檔位置
```

### 設定檔

DeepWiki 使用 JSON 設定檔來管理系統的各個層面：

1. **`generator.json`**：文字產生模型設定
   - 定義可用的模型提供商（Google、OpenAI、OpenRouter、Ollama）
   - 指定每個提供商的預設和可用模型
   - 包含模型特定參數，如 temperature 和 top_p

2. **`embedder.json`**：嵌入模型和文字處理設定
   - 定義用於向量儲存的嵌入模型
   - 包含用於 RAG 的檢索器設定
   - 指定文件分塊的文字分割器設定

3. **`repo.json`**：儲存庫處理設定
   - 包含排除特定檔案和目錄的檔案篩選器
   - 定義儲存庫大小限制和處理規則

預設情況下，這些檔案位於 `api/config/` 目錄中。您可以使用 `DEEPWIKI_CONFIG_DIR` 環境變數自訂它們的位置。

### 為服務提供商設計的自訂模型選擇

自訂模型選擇功能專為需要以下功能的服務提供商設計：

- 您可以在組織內為使用者提供多種 AI 模型選擇
- 您可以快速適應快速發展的 LLM 領域，無需變更程式碼
- 您可以支援不在預定義清單中的專業或微調模型

服務提供商可以透過從預定義選項中選擇或在前端介面中輸入自訂模型識別符來實作其模型提供方案。

### 為企業私有通道設計的基礎 URL 設定

OpenAI 客戶端的 base_url 設定主要為擁有私有 API 通道的企業使用者設計。此功能：

- 支援連線到私有或企業特定的 API 端點
- 允許組織使用自己的自主託管或自訂部署的 LLM 服務
- 支援與第三方 OpenAI API 相容服務的整合

**即將推出**：在未來的更新中，DeepWiki 將支援一種模式，讓使用者需要在請求中提供自己的 API 金鑰。這將允許擁有私有通道的企業客戶使用其現有的 API 安排，而不必與 DeepWiki 部署共享憑證。

## 🧩 使用 OpenAI 相容的嵌入模型（如阿里巴巴 Qwen）

如果您想使用與 OpenAI API 相容的嵌入模型（如阿里巴巴 Qwen），請按照以下步驟操作：

1. 用 `api/config/embedder_openai_compatible.json` 的內容替換 `api/config/embedder.json` 的內容。
2. 在專案根目錄的 `.env` 檔案中，設定相關的環境變數，例如：
   ```
   OPENAI_API_KEY=your_api_key
   OPENAI_BASE_URL=your_openai_compatible_endpoint
   ```
3. 程式會自動用環境變數的值替換 embedder.json 中的預留位置。

這讓您可以無縫切換到任何 OpenAI 相容的嵌入服務，無需變更程式碼。

### 日誌記錄

DeepWiki 使用 Python 的內建 `logging` 模組進行診斷輸出。您可以透過環境變數設定詳細程度和日誌檔案目標：

| 變數             | 說明                                                                 | 預設值                        |
|-----------------|----------------------------------------------------------------------|------------------------------|
| `LOG_LEVEL`     | 日誌記錄等級（DEBUG、INFO、WARNING、ERROR、CRITICAL）                    | INFO                         |
| `LOG_FILE_PATH` | 日誌檔案的路徑。如果設定，日誌將寫入此檔案   | `api/logs/application.log`   |

要啟用除錯日誌並將日誌導向自訂檔案：
```bash
export LOG_LEVEL=DEBUG
export LOG_FILE_PATH=./debug.log
python -m api.main
```
或使用 Docker Compose：
```bash
LOG_LEVEL=DEBUG LOG_FILE_PATH=./debug.log docker-compose up
```

使用 Docker Compose 執行時，容器的 `api/logs` 目錄會掛載到主機上的 `./api/logs`（請參閱 `docker-compose.yml` 中的 `volumes` 區段），確保日誌檔案在重新啟動後仍然存在。

您也可以將這些設定儲存在 `.env` 檔案中：

```bash
LOG_LEVEL=DEBUG
LOG_FILE_PATH=./debug.log
```
然後簡單執行：

```bash
docker-compose up
```

**日誌路徑安全性考量：** 在生產環境中，請確保 `api/logs` 目錄和任何自訂日誌檔案路徑都受到適當的檔案系統權限和存取控制保護。應用程式會強制要求 `LOG_FILE_PATH` 位於專案的 `api/logs` 目錄內，以防止路徑遍歷或未授權的寫入。

## 🛠️ 進階設定

### 環境變數

| 變數             | 說明                                                  | 必需 | 備註                                                                                                     |
|----------------------|--------------------------------------------------------------|----------|----------------------------------------------------------------------------------------------------------|
| `GOOGLE_API_KEY`     | Google Gemini API 金鑰，用於 AI 產生                      | 否 | 只有在您想使用 Google Gemini 模型時才需要                                                    
| `OPENAI_API_KEY`     | OpenAI API 金鑰，用於嵌入                                | 是 | 備註：即使您不使用 OpenAI 模型，這個也是必需的，因為它用於嵌入              |
| `OPENROUTER_API_KEY` | OpenRouter API 金鑰，用於替代模型                    | 否 | 只有在您想使用 OpenRouter 模型時才需要                                                       |
| `OLLAMA_HOST`        | Ollama 主機（預設：http://localhost:11434）                | 否 | 只有在您想使用外部 Ollama 伺服器時才需要                                                  |
| `PORT`               | API 伺服器的連接埠（預設：8001）                      | 否 | 如果您在同一台機器上託管 API 和前端，請確保相應地變更 `SERVER_BASE_URL` 的連接埠 |
| `SERVER_BASE_URL`    | API 伺服器的基礎 URL（預設：http://localhost:8001） | 否 |
| `DEEPWIKI_AUTH_MODE` | 設定為 `true` 或 `1` 以啟用授權模式 | 否 | 預設為 `false`。如果啟用，則需要 `DEEPWIKI_AUTH_CODE` |
| `DEEPWIKI_AUTH_CODE` | 當 `DEEPWIKI_AUTH_MODE` 啟用時，Wiki 產生所需的秘密代碼 | 否 | 只有在 `DEEPWIKI_AUTH_MODE` 為 `true` 或 `1` 時才使用 |

如果您不使用 ollama 模式，您需要設定 OpenAI API 金鑰用於嵌入。其他 API 金鑰只有在設定並使用對應提供商的模型時才需要。

## 授權模式

DeepWiki 可以設定為在授權模式下執行，在此模式下，Wiki 產生需要有效的授權代碼。如果您想控制誰可以使用產生功能，這會很有用。
限制前端啟動並保護快取刪除，但如果直接存取 API 端點，無法完全防止後端產生。

要啟用授權模式，請設定以下環境變數：

- `DEEPWIKI_AUTH_MODE`：將此設定為 `true` 或 `1`。啟用時，前端將顯示授權代碼的輸入欄位。
- `DEEPWIKI_AUTH_CODE`：將此設定為所需的秘密代碼。限制前端啟動並保護快取刪除，但如果直接存取 API 端點，無法完全防止後端產生。

如果未設定 `DEEPWIKI_AUTH_MODE` 或設定為 `false`（或除 `true`/`1` 以外的任何其他值），授權功能將被停用，不需要任何代碼。

### Docker 設定

您可以使用 Docker 來執行 DeepWiki：

```bash
# 從 GitHub Container Registry 拉取映像
docker pull ghcr.io/asyncfuncai/deepwiki-open:latest

# 使用環境變數執行容器
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -e OLLAMA_HOST=your_ollama_host \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

此命令也會將主機上的 `~/.adalflow` 掛載到容器中的 `/root/.adalflow`。此路徑用於儲存：
- 複製的儲存庫（`~/.adalflow/repos/`）
- 它們的嵌入和索引（`~/.adalflow/databases/`）
- 快取的已產生 Wiki 內容（`~/.adalflow/wikicache/`）

這確保即使容器停止或移除，您的資料也會持續存在。

或使用提供的 `docker-compose.yml` 檔案：

```bash
# 首先使用您的 API 金鑰編輯 .env 檔案
docker-compose up
```

（`docker-compose.yml` 檔案預先設定為掛載 `~/.adalflow` 以保持資料持續性，類似於上面的 `docker run` 命令。）

#### 在 Docker 中使用 .env 檔案

您也可以將 .env 檔案掛載到容器：

```bash
# 使用您的 API 金鑰建立 .env 檔案
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env
echo "OLLAMA_HOST=your_ollama_host" >> .env

# 使用掛載的 .env 檔案執行容器
docker run -p 8001:8001 -p 3000:3000 \
  -v $(pwd)/.env:/app/.env \
  -v ~/.adalflow:/root/.adalflow \
  ghcr.io/asyncfuncai/deepwiki-open:latest
```

此命令也會將主機上的 `~/.adalflow` 掛載到容器中的 `/root/.adalflow`。此路徑用於儲存：
- 複製的儲存庫（`~/.adalflow/repos/`）
- 它們的嵌入和索引（`~/.adalflow/databases/`）
- 快取的已產生 Wiki 內容（`~/.adalflow/wikicache/`）

這確保即使容器停止或移除，您的資料也會持續存在。

#### 在本機建置 Docker 映像

如果您想在本機建置 Docker 映像：

```bash
# 複製儲存庫
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# 建置 Docker 映像
docker build -t deepwiki-open .

# 執行容器
docker run -p 8001:8001 -p 3000:3000 \
  -e GOOGLE_API_KEY=your_google_api_key \
  -e OPENAI_API_KEY=your_openai_api_key \
  -e OPENROUTER_API_KEY=your_openrouter_api_key \
  -e OLLAMA_HOST=your_ollama_host \
  deepwiki-open
```

### API 伺服器詳細資訊

API 伺服器提供：
- 儲存庫複製和索引
- RAG（檢索增強產生）
- 串流聊天完成

更多詳細資訊，請參閱 [API README](./api/README.md)。

## 🔌 OpenRouter 整合

DeepWiki 現在支援 [OpenRouter](https://openrouter.ai/) 作為模型提供商，讓您可以透過單一 API 存取數百個 AI 模型：

- **多種模型選項**：存取來自 OpenAI、Anthropic、Google、Meta、Mistral 等的模型
- **簡單設定**：只需新增您的 OpenRouter API 金鑰並選擇您想使用的模型
- **成本效益**：選擇符合您預算和效能需求的模型
- **輕鬆切換**：在不同模型之間切換，無需變更程式碼

### 如何在 DeepWiki 中使用 OpenRouter

1. **取得 API 金鑰**：在 [OpenRouter](https://openrouter.ai/) 註冊並取得您的 API 金鑰
2. **新增到環境**：在您的 `.env` 檔案中新增 `OPENROUTER_API_KEY=your_key`
3. **在 UI 中啟用**：在首頁勾選「使用 OpenRouter API」選項
4. **選擇模型**：從熱門模型中選擇，如 GPT-4o、Claude 3.5 Sonnet、Gemini 2.0 等

OpenRouter 特別適用於以下情況：
- 想嘗試不同模型而不用註冊多個服務
- 存取在您所在地區可能受限的模型
- 比較不同模型提供商的效能
- 根據您的需求最佳化成本與效能的平衡

## 🤖 提問和深度研究功能

### 提問功能

提問功能允許您使用檢索增強產生（RAG）與您的儲存庫聊天：

- **上下文感知回應**：基於儲存庫中實際程式碼取得準確答案
- **RAG 驅動**：系統檢索相關程式碼片段，提供有根據的回應
- **即時串流傳輸**：即時檢視產生的回應，取得更互動式的體驗
- **對話歷史**：系統在問題之間保持上下文，實現更連貫的互動

### 深度研究功能

深度研究透過多輪研究過程將儲存庫分析提升到新水平：

- **深入調查**：透過多次研究迭代徹底探索複雜主題
- **結構化過程**：遵循清晰的研究計畫，包含更新和全面結論
- **自動繼續**：AI 自動繼續研究直到達成結論（最多 5 次迭代）
- **研究階段**：
  1. **研究計畫**：概述方法和初步發現
  2. **研究更新**：在前一輪迭代基礎上增加新見解
  3. **最終結論**：基於所有迭代提供全面答案

要使用深度研究，只需在提交問題前在提問介面中切換「深度研究」開關。

## 📱 螢幕截圖

### 主頁面
![主頁面](screenshots/home.png)

### Wiki 頁面
![Wiki 頁面](screenshots/wiki-page.png)

### 提問功能
![提問功能](screenshots/ask.png)

### 深度研究
![深度研究](screenshots/deep-research.png)

### 展示影片

[![DeepWiki 展示影片](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*觀看 DeepWiki 實際操作！*

## 🔧 配置選項

### 模型提供商

DeepWiki 支援多個 AI 模型提供商：

1. **Google Gemini**（預設）
   - 快速且經濟實惠
   - 良好的程式碼理解能力

2. **OpenAI**
   - 高品質輸出
   - 支援 GPT-4 和 GPT-3.5

3. **OpenRouter**
   - 存取多個模型
   - 靈活的定價選項

4. **本機 Ollama**
   - 隱私保護
   - 離線執行
   - 需要本機設定

### Wiki 類型

- **全面型**：包含詳細分析、程式碼範例和完整文件
- **簡潔型**：專注於核心功能和關鍵見解

## 🌍 支援的平台

- **GitHub**：公開和私人儲存庫
- **GitLab**：GitLab.com 和自主託管實例
- **Bitbucket**：Atlassian 託管的儲存庫

## 📚 API 端點

### `/api/wiki_cache`
- **方法**：GET
- **描述**：檢索快取的 Wiki 資料
- **參數**：
  - `repo`: 儲存庫識別符
  - `platform`: git 平台（github、gitlab、bitbucket）

### `/export/wiki`
- **方法**：GET
- **描述**：匯出 Wiki 為 Markdown 或 JSON
- **參數**：
  - `repo`: 儲存庫識別符
  - `format`: 匯出格式（markdown、json）

## ❓ 故障排除

### API 金鑰問題
- **「缺少環境變數」**：確保您的 `.env` 檔案位於專案根目錄並包含所需的 API 金鑰
- **「API 金鑰無效」**：檢查您是否正確複製了完整金鑰，沒有多餘空格
- **「OpenRouter API 錯誤」**：驗證您的 OpenRouter API 金鑰有效且有足夠的額度

### 連線問題
- **「無法連線到 API 伺服器」**：確保 API 伺服器在連接埠 8001 上執行
- **「CORS 錯誤」**：API 設定為允許所有來源，但如果您遇到問題，請嘗試在同一台機器上執行前端和後端

### 產生問題
- **「產生 Wiki 時出錯」**：對於非常大的儲存庫，請先嘗試較小的儲存庫
- **「無效的儲存庫格式」**：確保您使用有效的 GitHub、GitLab 或 Bitbucket URL 格式
- **「無法擷取儲存庫結構」**：對於私人儲存庫，確保您輸入了具有適當權限的有效個人存取權杖
- **「圖表轉譯錯誤」**：應用程式將自動嘗試修復損壞的圖表

### 常見解決方案
1. **重新啟動兩個伺服器**：有時簡單的重新啟動可以解決大多數問題
2. **檢查主控台日誌**：開啟瀏覽器開發者工具查看任何 JavaScript 錯誤
3. **檢查 API 日誌**：查看執行 API 的終端中的 Python 錯誤

## 🤝 貢獻

我們歡迎各種形式的貢獻！無論是錯誤報告、功能請求還是程式碼貢獻。

### 開發設定

1. Fork 此儲存庫
2. 建立功能分支：`git checkout -b feature/amazing-feature`
3. 提交您的變更：`git commit -m 'Add amazing feature'`
4. 推送到分支：`git push origin feature/amazing-feature`
5. 開啟 Pull Request

### 新增新語言支援

1. 在 `src/messages/` 中新增新的翻譯檔案
2. 更新 `src/i18n.ts` 中的 `locales` 陣列
3. 建立相對應的 README 檔案
4. 測試翻譯

## 📄 授權

此專案根據 MIT 授權條款授權 - 詳情請參閱 [LICENSE](LICENSE) 檔案。

## 🙏 致謝

- 感謝所有貢獻者的努力
- 基於 Next.js、FastAPI 和各種開源程式庫建構
- 特別感謝 AI 模型提供商讓此專案成為可能

## 🐛 問題回報

如果您遇到任何問題，請在 GitHub Issues 中建立問題報告。請包含：

- 錯誤描述
- 重現步驟
- 預期行為
- 螢幕截圖（如果適用）
- 系統資訊

## 🔮 未來計劃

- [ ] 更多 AI 模型整合
- [ ] 進階程式碼分析功能
- [ ] 即時協作編輯
- [ ] 行動應用支援
- [ ] 企業級功能

## ⭐ Star 歷史

[![Star 歷史圖表](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)


================================================
FILE: README.zh.md
================================================
# DeepWiki-Open

![DeepWiki 横幅](screenshots/Deepwiki.png)

**DeepWiki**可以为任何GitHub、GitLab或BitBucket代码仓库自动创建美观、交互式的Wiki！只需输入仓库名称，DeepWiki将：

1. 分析代码结构
2. 生成全面的文档
3. 创建可视化图表解释一切如何运作
4. 将所有内容整理成易于导航的Wiki

[!["Buy Me A Coffee"](https://www.buymeacoffee.com/assets/img/custom_images/orange_img.png)](https://buymeacoffee.com/sheing)

[![Twitter/X](https://img.shields.io/badge/Twitter-1DA1F2?style=for-the-badge&logo=twitter&logoColor=white)](https://x.com/sashimikun_void)
[![Discord](https://img.shields.io/badge/Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/VQMBGR8u5v)

[English](./README.md) | [简体中文](./README.zh.md) | [繁體中文](./README.zh-tw.md) | [日本語](./README.ja.md) | [Español](./README.es.md) | [한국어](./README.kr.md) | [Tiếng Việt](./README.vi.md) | [Português Brasileiro](./README.pt-br.md) | [Français](./README.fr.md) | [Русский](./README.ru.md)

## ✨ 特点

- **即时文档**：几秒钟内将任何GitHub、GitLab或BitBucket仓库转换为Wiki
- **私有仓库支持**：使用个人访问令牌安全访问私有仓库
- **智能分析**：AI驱动的代码结构和关系理解
- **精美图表**：自动生成Mermaid图表可视化架构和数据流
- **简易导航**：简单、直观的界面探索Wiki
- **提问功能**：使用RAG驱动的AI与您的仓库聊天，获取准确答案
- **深度研究**：多轮研究过程，彻底调查复杂主题
- **多模型提供商**：支持Google Gemini、OpenAI、OpenRouter和本地Ollama模型

## 🚀 快速开始（超级简单！）

### 选项1：使用Docker

```bash
# 克隆仓库
git clone https://github.com/AsyncFuncAI/deepwiki-open.git
cd deepwiki-open

# 创建包含API密钥的.env文件
echo "GOOGLE_API_KEY=your_google_api_key" > .env
echo "OPENAI_API_KEY=your_openai_api_key" >> .env
# 可选：如果您想使用OpenRouter模型，添加OpenRouter API密钥
echo "OPENROUTER_API_KEY=your_openrouter_api_key" >> .env

# 使用Docker Compose运行
docker-compose up
```

(上述 Docker 命令以及 `docker-compose.yml` 配置会挂载您主机上的 `~/.adalflow` 目录到容器内的 `/root/.adalflow`。此路径用于存储：
- 克隆的仓库 (`~/.adalflow/repos/`)
- 仓库的嵌入和索引 (`~/.adalflow/databases/`)
- 缓存的已生成 Wiki 内容 (`~/.adalflow/wikicache/`)

这确保了即使容器停止或移除，您的数据也能持久保存。)

> 💡 **获取这些密钥的地方：**
> - 从[Google AI Studio](https://makersuite.google.com/app/apikey)获取Google API密钥
> - 从[OpenAI Platform](https://platform.openai.com/api-keys)获取OpenAI API密钥

### 选项2：手动设置（推荐）

#### 步骤1：设置API密钥

在项目根目录创建一个`.env`文件，包含以下密钥：

```
GOOGLE_API_KEY=your_google_api_key
OPENAI_API_KEY=your_openai_api_key
# 可选：如果您想使用OpenRouter模型，添加此项
OPENROUTER_API_KEY=your_openrouter_api_key
```

#### 步骤2：启动后端

```bash
# 安装Python依赖
python -m pip install poetry==2.0.1 && poetry install -C api

# 启动API服务器
python -m api.main
```

#### 步骤3：启动前端

```bash
# 安装JavaScript依赖
npm install
# 或
yarn install

# 启动Web应用
npm run dev
# 或
yarn dev
```

#### 步骤4：使用DeepWiki！

1. 在浏览器中打开[http://localhost:3000](http://localhost:3000)
2. 输入GitHub、GitLab或Bitbucket仓库（如`https://github.com/openai/codex`、`https://github.com/microsoft/autogen`、`https://gitlab.com/gitlab-org/gitlab`或`https://bitbucket.org/redradish/atlassian_app_versions`）
3. 对于私有仓库，点击"+ 添加访问令牌"并输入您的GitHub或GitLab个人访问令牌
4. 点击"生成Wiki"，见证奇迹的发生！

## 🔍 工作原理

DeepWiki使用AI来：

1. 克隆并分析GitHub、GitLab或Bitbucket仓库（包括使用令牌认证的私有仓库）
2. 创建代码嵌入用于智能检索
3. 使用上下文感知AI生成文档（使用Google Gemini、OpenAI、OpenRouter或本地Ollama模型）
4. 创建可视化图表解释代码关系
5. 将所有内容组织成结构化Wiki
6. 通过提问功能实现与仓库的智能问答
7. 通过深度研究功能提供深入研究能力

```mermaid
graph TD
    A[用户输入GitHub/GitLab/Bitbucket仓库] --> AA{私有仓库?}
    AA -->|是| AB[添加访问令牌]
    AA -->|否| B[克隆仓库]
    AB --> B
    B --> C[分析代码结构]
    C --> D[创建代码嵌入]

    D --> M{选择模型提供商}
    M -->|Google Gemini| E1[使用Gemini生成]
    M -->|OpenAI| E2[使用OpenAI生成]
    M -->|OpenRouter| E3[使用OpenRouter生成]
    M -->|本地Ollama| E4[使用Ollama生成]

    E1 --> E[生成文档]
    E2 --> E
    E3 --> E
    E4 --> E

    D --> F[创建可视化图表]
    E --> G[组织为Wiki]
    F --> G
    G --> H[交互式DeepWiki]

    classDef process stroke-width:2px;
    classDef data stroke-width:2px;
    classDef result stroke-width:2px;
    classDef decision stroke-width:2px;

    class A,D data;
    class AA,M decision;
    class B,C,E,F,G,AB,E1,E2,E3,E4 process;
    class H result;
```

## 🛠️ 项目结构

```
deepwiki/
├── api/                  # 后端API服务器
│   ├── main.py           # API入口点
│   ├── api.py            # FastAPI实现
│   ├── rag.py            # 检索增强生成
│   ├── data_pipeline.py  # 数据处理工具
│   └── requirements.txt  # Python依赖
│
├── src/                  # 前端Next.js应用
│   ├── app/              # Next.js应用目录
│   │   └── page.tsx      # 主应用页面
│   └── components/       # React组件
│       └── Mermaid.tsx   # Mermaid图表渲染器
│
├── public/               # 静态资源
├── package.json          # JavaScript依赖
└── .env                  # 环境变量（需要创建）
```

## 🤖 提问和深度研究功能

### 提问功能

提问功能允许您使用检索增强生成（RAG）与您的仓库聊天：

- **上下文感知响应**：基于仓库中实际代码获取准确答案
- **RAG驱动**：系统检索相关代码片段，提供有根据的响应
- **实时流式传输**：实时查看生成的响应，获得更交互式的体验
- **对话历史**：系统在问题之间保持上下文，实现更连贯的交互

### 深度研究功能

深度研究通过多轮研究过程将仓库分析提升到新水平：

- **深入调查**：通过多次研究迭代彻底探索复杂主题
- **结构化过程**：遵循清晰的研究计划，包含更新和全面结论
- **自动继续**：AI自动继续研究直到达成结论（最多5次迭代）
- **研究阶段**：
  1. **研究计划**：概述方法和初步发现
  2. **研究更新**：在前一轮迭代基础上增加新见解
  3. **最终结论**：基于所有迭代提供全面答案

要使用深度研究，只需在提交问题前在提问界面中切换"深度研究"开关。

## 📱 截图

![DeepWiki主界面](screenshots/Interface.png)
*DeepWiki的主界面*

![私有仓库支持](screenshots/privaterepo.png)
*使用个人访问令牌访问私有仓库*

![深度研究功能](screenshots/DeepResearch.png)
*深度研究为复杂主题进行多轮调查*

### 演示视频

[![DeepWiki演示视频](https://img.youtube.com/vi/zGANs8US8B4/0.jpg)](https://youtu.be/zGANs8US8B4)

*观看DeepWiki实际操作！*

## ❓ 故障排除

### API密钥问题
- **"缺少环境变量"**：确保您的`.env`文件位于项目根目录并包含所需的API密钥
- **"API密钥无效"**：检查您是否正确复制了完整密钥，没有多余空格
- **"OpenRouter API错误"**：验证您的OpenRouter API密钥有效且有足够的额度

### 连接问题
- **"无法连接到API服务器"**：确保API服务器在端口8001上运行
- **"CORS错误"**：API配置为允许所有来源，但如果您遇到问题，请尝试在同一台机器上运行前端和后端

### 生成问题
- **"生成Wiki时出错"**：对于非常大的仓库，请先尝试较小的仓库
- **"无效的仓库格式"**：确保您使用有效的GitHub、GitLab或Bitbucket URL格式
- **"无法获取仓库结构"**：对于私有仓库，确保您输入了具有适当权限的有效个人访问令牌
- **"图表渲染错误"**：应用程序将自动尝试修复损坏的图表

### 常见解决方案
1. **重启两个服务器**：有时简单的重启可以解决大多数问题
2. **检查控制台日志**：打开浏览器开发者工具查看任何JavaScript错误
3. **检查API日志**：查看运行API的终端中的Python错误

## 🤝 贡献

欢迎贡献！随时：
- 为bug或功能请求开issue
- 提交pull request改进代码
- 分享您的反馈和想法

## 📄 许可证

本项目根据MIT许可证授权 - 详情请参阅[LICENSE](LICENSE)文件。

## ⭐ 星标历史

[![星标历史图表](https://api.star-history.com/svg?repos=AsyncFuncAI/deepwiki-open&type=Date)](https://star-history.com/#AsyncFuncAI/deepwiki-open&Date)

## 🤖 基于提供者的模型选择系统

DeepWiki 现在实现了灵活的基于提供者的模型选择系统，支持多种 LLM 提供商：

### 支持的提供商和模型

- **Google**: 默认使用 `gemini-2.5-flash`，还支持 `gemini-2.5-flash-lite`、`gemini-2.5-pro` 等
- **OpenAI**: 默认使用 `gpt-5-nano`，还支持 `gpt-5`, `4o` 等
- **OpenRouter**: 通过统一 API 访问多种模型，包括 Claude、Llama、Mistral 等
- **Ollama**: 支持本地运行的开源模型，如 `llama3`

### 环境变量

每个提供商需要相应的 API 密钥环境变量：

```
# API 密钥
GOOGLE_API_KEY=你的谷歌API密钥        # 使用 Google Gemini 模型必需
OPENAI_API_KEY=你的OpenAI密钥        # 使用 OpenAI 模型必需
OPENROUTER_API_KEY=你的OpenRouter密钥 # 使用 OpenRouter 模型必需

# OpenAI API 基础 URL 配置
OPENAI_BASE_URL=https://自定义API端点.com/v1  # 可选，用于自定义 OpenAI API 端点
```

### 为服务提供者设计的自定义模型选择

自定义模型选择功能专为需要以下功能的服务提供者设计：

- 您可在您的组织内部为用户提供多种 AI 模型选择
- 您无需代码更改即可快速适应快速发展的 LLM 领域
- 您可支持预定义列表中没有的专业或微调模型

使用者可以通过从服务提供者预定义选项中选择或在前端界面中输入自定义模型标识符来实现其模型产品。

### 为企业私有渠道设计的基础 URL 配置

OpenAI 客户端的 base_url 配置主要为拥有私有 API 渠道的企业用户设计。此功能：

- 支持连接到私有或企业特定的 API 端点
- 允许组织使用自己的自托管或自定义部署的 LLM 服务
- 支持与第三方 OpenAI API 兼容服务的集成

**即将推出**：在未来的更新中，DeepWiki 将支持一种模式，用户需要在请求中提供自己的 API 密钥。这将允许拥有私有渠道的企业客户使用其现有的 API 安排，而不是与 DeepWiki 部署共享凭据。

### 环境变量

每个提供商需要其相应的API密钥环境变量：

```
# API密钥
GOOGLE_API_KEY=your_google_api_key        # Google Gemini模型必需
OPENAI_API_KEY=your_openai_api_key        # OpenAI模型必需
OPENROUTER_API_KEY=your_openrouter_api_key # OpenRouter模型必需

# OpenAI API基础URL配置
OPENAI_BASE_URL=https://custom-api-endpoint.com/v1  # 可选，用于自定义OpenAI API端点

# 配置目录
DEEPWIKI_CONFIG_DIR=/path/to/custom/config/dir  # 可选，用于自定义配置文件位置

# 授权模式
DEEPWIKI_AUTH_MODE=true  # 设置为 true 或 1 以启用授权模式
DEEPWIKI_AUTH_CODE=your_secret_code # 当 DEEPWIKI_AUTH_MODE 启用时所需的授权码
```
如果不使用ollama模式，那么需要配置OpenAI API密钥用于embeddings。其他密钥只有配置并使用使用对应提供商的模型时才需要。

## 授权模式

DeepWiki 可以配置为在授权模式下运行，在该模式下，生成 Wiki 需要有效的授权码。如果您想控制谁可以使用生成功能，这将非常有用。
限制使用前端页面生成wiki并保护已生成页面的缓存删除，但无法完全阻止直接访问 API 端点生成wiki。主要目的是为了保护管理员已生成的wiki页面，防止被访问者重新生成。

要启用授权模式，请设置以下环境变量：

- `DEEPWIKI_AUTH_MODE`: 将此设置为 `true` 或 `1`。启用后，前端将显示一个用于输入授权码的字段。
- `DEEPWIKI_AUTH_CODE`: 将此设置为所需的密钥。限制使用前端页面生成wiki并保护已生成页面的缓存删除，但无法完全阻止直接访问 API 端点生成wiki。

如果未设置 `DEEPWIKI_AUTH_MODE` 或将其设置为 `false`（或除 `true`/`1` 之外的任何其他值），则授权功能将被禁用，并且不需要任何代码。

### 配置文件

DeepWiki使用JSON配置文件管理系统的各个方面：

1. **`generator.json`**：文本生成模型配置
   - 定义可用的模型提供商（Google、OpenAI、OpenRouter、Ollama）
   - 指定每个提供商的默认和可用模型
   - 包含特定模型的参数，如temperature和top_p

2. **`embedder.json`**：嵌入模型和文本处理配置
   - 定义用于向量存储的嵌入模型
   - 包含用于RAG的检索器配置
   - 指定文档分块的文本分割器设置

3. **`repo.json`**：仓库处理配置
   - 包含排除特定文件和目录的文件过滤器
   - 定义仓库大小限制和处理规则

默认情况下，这些文件位于`api/config/`目录中。您可以使用`DEEPWIKI_CONFIG_DIR`环境变量自定义它们的位置。

### 面向服务提供商的自定义模型选择

自定义模型选择功能专为需要以下功能的服务提供者设计：

- 您可在您的组织内部为用户提供多种 AI 模型选择
- 您无需代码更改即可快速适应快速发展的 LLM 领域
- 您可支持预定义列表中没有的专业或微调模型

使用者可以通过从服务提供者预定义选项中选择或在前端界面中输入自定义模型标识符来实现其模型产品。

### 为企业私有渠道设计的基础 URL 配置

OpenAI 客户端的 base_url 配置主要为拥有私有 API 渠道的企业用户设计。此功能：

- 支持连接到私有或企业特定的 API 端点
- 允许组织使用自己的自托管或自定义部署的 LLM 服务
- 支持与第三方 OpenAI API 兼容服务的集成

**即将推出**：在未来的更新中，DeepWiki 将支持一种模式，用户需要在请求中提供自己的 API 密钥。这将允许拥有私有渠道的企业客户使用其现有的 API 安排，而不是与 DeepWiki 部署共享凭据。

## 🧩 使用 OpenAI 兼容的 Embedding 模型（如阿里巴巴 Qwen）

如果你希望使用 OpenAI 以外、但兼容 OpenAI 接口的 embedding 模型（如阿里巴巴 Qwen），请参考以下步骤：

1. 用 `api/config/embedder_openai_compatible.json` 的内容替换 `api/config/embedder.json`。
2. 在项目根目录的 `.env` 文件中，配置相应的环境变量，例如：
   ```
   OPENAI_API_KEY=你的_api_key
   OPENAI_BASE_URL=你的_openai_兼容接口地址
   ```
3. 程序会自动用环境变量的值替换 embedder.json 里的占位符。

这样即可无缝切换到 OpenAI 兼容的 embedding 服务，无需修改代码。


================================================
FILE: api/README.md
================================================
# 🚀 DeepWiki API

This is the backend API for DeepWiki, providing smart code analysis and AI-powered documentation generation.

## ✨ Features

- **Streaming AI Responses**: Real-time responses using Google's Generative AI (Gemini)
- **Smart Code Analysis**: Automatically analyzes GitHub repositories
- **RAG Implementation**: Retrieval Augmented Generation for context-aware responses
- **Local Storage**: All data stored locally - no cloud dependencies
- **Conversation History**: Maintains context across multiple questions

## 🔧 Quick Setup

### Step 1: Install Dependencies

```bash
# From the project root
python -m pip install poetry==2.0.1 && poetry install -C api
```

### Step 2: Set Up Environment Variables

Create a `.env` file in the project root:

```
# Required API Keys
GOOGLE_API_KEY=your_google_api_key        # Required for Google Gemini models
OPENAI_API_KEY=your_openai_api_key        # Required for embeddings and OpenAI models

# Optional API Keys
OPENROUTER_API_KEY=your_openrouter_api_key  # Required only if using OpenRouter models

# AWS Bedrock Configuration
AWS_ACCESS_KEY_ID=your_aws_access_key_id      # Required for AWS Bedrock models
AWS_SECRET_ACCESS_KEY=your_aws_secret_key     # Required for AWS Bedrock models
AWS_REGION=us-east-1                          # Optional, defaults to us-east-1
AWS_ROLE_ARN=your_aws_role_arn                # Optional, for role-based authentication

# OpenAI API Configuration
OPENAI_BASE_URL=https://custom-api-endpoint.com/v1  # Optional, for custom OpenAI API endpoints

# Ollama host
OLLAMA_HOST=https://your_ollama_host"  # Optional: Add Ollama host if not local. default: http://localhost:11434

# Server Configuration
PORT=8001  # Optional, defaults to 8001
```

If you're not using Ollama mode, you need to configure an OpenAI API key for embeddings. Other API keys are only required when configuring and using models from the corresponding providers.

> 💡 **Where to get these keys:**
> - Get a Google API key from [Google AI Studio](https://makersuite.google.com/app/apikey)
> - Get an OpenAI API key from [OpenAI Platform](https://platform.openai.com/api-keys)
> - Get an OpenRouter API key from [OpenRouter](https://openrouter.ai/keys)
> - Get AWS credentials from [AWS IAM Console](https://console.aws.amazon.com/iam/)

#### Advanced Environment Configuration

##### Provider-Based Model Selection
DeepWiki supports multiple LLM providers. The environment variables above are required depending on which providers you want to use:

- **Google Gemini**: Requires `GOOGLE_API_KEY`
- **OpenAI**: Requires `OPENAI_API_KEY`
- **OpenRouter**: Requires `OPENROUTER_API_KEY`
- **AWS Bedrock**: Requires `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`
- **Ollama**: No API key required (runs locally)

##### Custom OpenAI API Endpoints
The `OPENAI_BASE_URL` variable allows you to specify a custom endpoint for the OpenAI API. This is useful for:

- Enterprise users with private API channels
- Organizations using self-hosted or custom-deployed LLM services
- Integration with third-party OpenAI API-compatible services

**Example:** you can use the endpoint which support the OpenAI protocol provided by any organization
```
OPENAI_BASE_URL=https://custom-openai-endpoint.com/v1
```

##### Configuration Files
DeepWiki now uses JSON configuration files to manage various system components instead of hardcoded values:

1. **`generator.json`**: Configuration for text generation models
   - Located in `api/config/` by default
   - Defines available model providers (Google, OpenAI, OpenRouter, AWS Bedrock, Ollama)
   - Specifies default and available models for each provider
   - Contains model-specific parameters like temperature and top_p

2. **`embedder.json`**: Configuration for embedding models and text processing
   - Located in `api/config/` by default
   - Defines embedding models for vector storage
   - Contains retriever configuration for RAG
   - Specifies text splitter settings for document chunking

3. **`repo.json`**: Configuration for repository handling
   - Located in `api/config/` by default
   - Contains file filters to exclude certain files and directories
   - Defines repository size limits and processing rules

You can customize the configuration directory location using the environment variable:

```
DEEPWIKI_CONFIG_DIR=/path/to/custom/config/dir  # Optional, for custom config file location
```

This allows you to maintain different configurations for various environments or deployment scenarios without modifying the code.

### Step 3: Start the API Server

```bash
# From the project root
python -m api.main
```

The API will be available at `http://localhost:8001`

## 🧠 How It Works

### 1. Repository Indexing
When you provide a GitHub repository URL, the API:
- Clones the repository locally (if not already cloned)
- Reads all files in the repository
- Creates embeddings for the files using OpenAI
- Stores the embeddings in a local database

### 2. Smart Retrieval (RAG)
When you ask a question:
- The API finds the most relevant code snippets
- These snippets are used as context for the AI
- The AI generates a response based on this context

### 3. Real-Time Streaming
- Responses are streamed in real-time
- You see the answer as it's being generated
- This creates a more interactive experience

## 📡 API Endpoints

### GET /
Returns basic API information and available endpoints.

### POST /chat/completions/stream
Streams an AI-generated response about a GitHub repository.

**Request Body:**

```json
{
  "repo_url": "https://github.com/username/repo",
  "messages": [
    {
      "role": "user",
      "content": "What does this repository do?"
    }
  ],
  "filePath": "optional/path/to/file.py"  // Optional
}
```

**Response:**
A streaming response with the generated text.

## 📝 Example Code

```python
import requests

# API endpoint
url = "http://localhost:8001/chat/completions/stream"

# Request data
payload = {
    "repo_url": "https://github.com/AsyncFuncAI/deepwiki-open",
    "messages": [
        {
            "role": "user",
            "content": "Explain how React components work"
        }
    ]
}

# Make streaming request
response = requests.post(url, json=payload, stream=True)

# Process the streaming response
for chunk in response.iter_content(chunk_size=None):
    if chunk:
        print(chunk.decode('utf-8'), end='', flush=True)
```

## 💾 Storage

All data is stored locally on your machine:
- Cloned repositories: `~/.adalflow/repos/`
- Embeddings and indexes: `~/.adalflow/databases/`
- Generated wiki cache: `~/.adalflow/wikicache/`

No cloud storage is used - everything runs on your computer!


================================================
FILE: api/__init__.py
================================================
# Make the api package importable

# api package


================================================
FILE: api/api.py
================================================
import os
import logging
from fastapi import FastAPI, HTTPException, Query, Request, WebSocket
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, Response
from typing import List, Optional, Dict, Any, Literal
import json
from datetime import datetime
from pydantic import BaseModel, Field
import google.generativeai as genai
import asyncio

# Configure logging
from api.logging_config import setup_logging

setup_logging()
logger = logging.getLogger(__name__)


# Initialize FastAPI app
app = FastAPI(
    title="Streaming API",
    description="API for streaming chat completions"
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
)

# Helper function to get adalflow root path
def get_adalflow_default_root_path():
    return os.path.expanduser(os.path.join("~", ".adalflow"))

# --- Pydantic Models ---
class WikiPage(BaseModel):
    """
    Model for a wiki page.
    """
    id: str
    title: str
    content: str
    filePaths: List[str]
    importance: str # Should ideally be Literal['high', 'medium', 'low']
    relatedPages: List[str]

class ProcessedProjectEntry(BaseModel):
    id: str  # Filename
    owner: str
    repo: str
    name: str  # owner/repo
    repo_type: str # Renamed from type to repo_type for clarity with existing models
    submittedAt: int # Timestamp
    language: str # Extracted from filename

class RepoInfo(BaseModel):
    owner: str
    repo: str
    type: str
    token: Optional[str] = None
    localPath: Optional[str] = None
    repoUrl: Optional[str] = None


class WikiSection(BaseModel):
    """
    Model for the wiki sections.
    """
    id: str
    title: str
    pages: List[str]
    subsections: Optional[List[str]] = None


class WikiStructureModel(BaseModel):
    """
    Model for the overall wiki structure.
    """
    id: str
    title: str
    description: str
    pages: List[WikiPage]
    sections: Optional[List[WikiSection]] = None
    rootSections: Optional[List[str]] = None

class WikiCacheData(BaseModel):
    """
    Model for the data to be stored in the wiki cache.
    """
    wiki_structure: WikiStructureModel
    generated_pages: Dict[str, WikiPage]
    repo_url: Optional[str] = None  #compatible for old cache
    repo: Optional[RepoInfo] = None
    provider: Optional[str] = None
    model: Optional[str] = None

class WikiCacheRequest(BaseModel):
    """
    Model for the request body when saving wiki cache.
    """
    repo: RepoInfo
    language: str
    wiki_structure: WikiStructureModel
    generated_pages: Dict[str, WikiPage]
    provider: str
    model: str

class WikiExportRequest(BaseModel):
    """
    Model for requesting a wiki export.
    """
    repo_url: str = Field(..., description="URL of the repository")
    pages: List[WikiPage] = Field(..., description="List of wiki pages to export")
    format: Literal["markdown", "json"] = Field(..., description="Export format (markdown or json)")

# --- Model Configuration Models ---
class Model(BaseModel):
    """
    Model for LLM model configuration
    """
    id: str = Field(..., description="Model identifier")
    name: str = Field(..., description="Display name for the model")

class Provider(BaseModel):
    """
    Model for LLM provider configuration
    """
    id: str = Field(..., description="Provider identifier")
    name: str = Field(..., description="Display name for the provider")
    models: List[Model] = Field(..., description="List of available models for this provider")
    supportsCustomModel: Optional[bool] = Field(False, description="Whether this provider supports custom models")

class ModelConfig(BaseModel):
    """
    Model for the entire model configuration
    """
    providers: List[Provider] = Field(..., description="List of available model providers")
    defaultProvider: str = Field(..., description="ID of the default provider")

class AuthorizationConfig(BaseModel):
    code: str = Field(..., description="Authorization code")

from api.config import configs, WIKI_AUTH_MODE, WIKI_AUTH_CODE

@app.get("/lang/config")
async def get_lang_config():
    return configs["lang_config"]

@app.get("/auth/status")
async def get_auth_status():
    """
    Check if authentication is required for the wiki.
    """
    return {"auth_required": WIKI_AUTH_MODE}

@app.post("/auth/validate")
async def validate_auth_code(request: AuthorizationConfig):
    """
    Check authorization code.
    """
    return {"success": WIKI_AUTH_CODE == request.code}

@app.get("/models/config", response_model=ModelConfig)
async def get_model_config():
    """
    Get available model providers and their models.

    This endpoint returns the configuration of available model providers and their
    respective models that can be used throughout the application.

    Returns:
        ModelConfig: A configuration object containing providers and their models
    """
    try:
        logger.info("Fetching model configurations")

        # Create providers from the config file
        providers = []
        default_provider = configs.get("default_provider", "google")

        # Add provider configuration based on config.py
        for provider_id, provider_config in configs["providers"].items():
            models = []
            # Add models from config
            for model_id in provider_config["models"].keys():
                # Get a more user-friendly display name if possible
                models.append(Model(id=model_id, name=model_id))

            # Add provider with its models
            providers.append(
                Provider(
                    id=provider_id,
                    name=f"{provider_id.capitalize()}",
                    supportsCustomModel=provider_config.get("supportsCustomModel", False),
                    models=models
                )
            )

        # Create and return the full configuration
        config = ModelConfig(
            providers=providers,
            defaultProvider=default_provider
        )
        return config

    except Exception as e:
        logger.error(f"Error creating model configuration: {str(e)}")
        # Return some default configuration in case of error
        return ModelConfig(
            providers=[
                Provider(
                    id="google",
                    name="Google",
                    supportsCustomModel=True,
                    models=[
                        Model(id="gemini-2.5-flash", name="Gemini 2.5 Flash")
                    ]
                )
            ],
            defaultProvider="google"
        )

@app.post("/export/wiki")
async def export_wiki(request: WikiExportRequest):
    """
    Export wiki content as Markdown or JSON.

    Args:
        request: The export request containing wiki pages and format

    Returns:
        A downloadable file in the requested format
    """
    try:
        logger.info(f"Exporting wiki for {request.repo_url} in {request.format} format")

        # Extract repository name from URL for the filename
        repo_parts = request.repo_url.rstrip('/').split('/')
        repo_name = repo_parts[-1] if len(repo_parts) > 0 else "wiki"

        # Get current timestamp for the filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

        if request.format == "markdown":
            # Generate Markdown content
            content = generate_markdown_export(request.repo_url, request.pages)
            filename = f"{repo_name}_wiki_{timestamp}.md"
            media_type = "text/markdown"
        else:  # JSON format
            # Generate JSON content
            content = generate_json_export(request.repo_url, request.pages)
            filename = f"{repo_name}_wiki_{timestamp}.json"
            media_type = "application/json"

        # Create response with appropriate headers for file download
        response = Response(
            content=content,
            media_type=media_type,
            headers={
                "Content-Disposition": f"attachment; filename={filename}"
            }
        )

        return response

    except Exception as e:
        error_msg = f"Error exporting wiki: {str(e)}"
        logger.error(error_msg)
        raise HTTPException(status_code=500, detail=error_msg)

@app.get("/local_repo/structure")
async def get_local_repo_structure(path: str = Query(None, description="Path to local repository")):
    """Return the file tree and README content for a local repository."""
    if not path:
        return JSONResponse(
            status_code=400,
            content={"error": "No path provided. Please provide a 'path' query parameter."}
        )

    if not os.path.isdir(path):
        return JSONResponse(
            status_code=404,
            content={"error": f"Directory not found: {path}"}
        )

    try:
        logger.info(f"Processing local repository at: {path}")
        file_tree_lines = []
        readme_content = ""

        for root, dirs, files in os.walk(path):
            # Exclude hidden dirs/files and virtual envs
            dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__' and d != 'node_modules' and d != '.venv']
            for file in files:
                if file.startswith('.') or file == '__init__.py' or file == '.DS_Store':
                    continue
                rel_dir = os.path.relpath(root, path)
                rel_file = os.path.join(rel_dir, file) if rel_dir != '.' else file
                file_tree_lines.append(rel_file)
                # Find README.md (case-insensitive)
                if file.lower() == 'readme.md' and not readme_content:
                    try:
                        with open(os.path.join(root, file), 'r', encoding='utf-8') as f:
                            readme_content = f.read()
                    except Exception as e:
                        logger.warning(f"Could not read README.md: {str(e)}")
                        readme_content = ""

        file_tree_str = '\n'.join(sorted(file_tree_lines))
        return {"file_tree": file_tree_str, "readme": readme_content}
    except Exception as e:
        logger.error(f"Error processing local repository: {str(e)}")
        return JSONResponse(
            status_code=500,
            content={"error": f"Error processing local repository: {str(e)}"}
        )

def generate_markdown_export(repo_url: str, pages: List[WikiPage]) -> str:
    """
    Generate Markdown export of wiki pages.

    Args:
        repo_url: The repository URL
        pages: List of wiki pages

    Returns:
        Markdown content as string
    """
    # Start with metadata
    markdown = f"# Wiki Documentation for {repo_url}\n\n"
    markdown += f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n"

    # Add table of contents
    markdown += "## Table of Contents\n\n"
    for page in pages:
        markdown += f"- [{page.title}](#{page.id})\n"
    markdown += "\n"

    # Add each page
    for page in pages:
        markdown += f"<a id='{page.id}'></a>\n\n"
        markdown += f"## {page.title}\n\n"


        # Add related pages
        if page.relatedPages and len(page.relatedPages) > 0:
            markdown += "### Related Pages\n\n"
            related_titles = []
            for related_id in page.relatedPages:
                # Find the title of the related page
                related_page = next((p for p in pages if p.id == related_id), None)
                if related_page:
                    related_titles.append(f"[{related_page.title}](#{related_id})")

            if related_titles:
                markdown += "Related topics: " + ", ".join(related_titles) + "\n\n"

        # Add page content
        markdown += f"{page.content}\n\n"
        markdown += "---\n\n"

    return markdown

def generate_json_export(repo_url: str, pages: List[WikiPage]) -> str:
    """
    Generate JSON export of wiki pages.

    Args:
        repo_url: The repository URL
        pages: List of wiki pages

    Returns:
        JSON content as string
    """
    # Create a dictionary with metadata and pages
    export_data = {
        "metadata": {
            "repository": repo_url,
            "generated_at": datetime.now().isoformat(),
            "page_count": len(pages)
        },
        "pages": [page.model_dump() for page in pages]
    }

    # Convert to JSON string with pretty formatting
    return json.dumps(export_data, indent=2)

# Import the simplified chat implementation
from api.simple_chat import chat_completions_stream
from api.websocket_wiki import handle_websocket_chat

# Add the chat_completions_stream endpoint to the main app
app.add_api_route("/chat/completions/stream", chat_completions_stream, methods=["POST"])

# Add the WebSocket endpoint
app.add_websocket_route("/ws/chat", handle_websocket_chat)

# --- Wiki Cache Helper Functions ---

WIKI_CACHE_DIR = os.path.join(get_adalflow_default_root_path(), "wikicache")
os.makedirs(WIKI_CACHE_DIR, exist_ok=True)

def get_wiki_cache_path(owner: str, repo: str, repo_type: str, language: str) -> str:
    """Generates the file path for a given wiki cache."""
    filename = f"deepwiki_cache_{repo_type}_{owner}_{repo}_{language}.json"
    return os.path.join(WIKI_CACHE_DIR, filename)

async def read_wiki_cache(owner: str, repo: str, repo_type: str, language: str) -> Optional[WikiCacheData]:
    """Reads wiki cache data from the file system."""
    cache_path = get_wiki_cache_path(owner, repo, repo_type, language)
    if os.path.exists(cache_path):
        try:
            with open(cache_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                return WikiCacheData(**data)
        except Exception as e:
            logger.error(f"Error reading wiki cache from {cache_path}: {e}")
            return None
    return None

async def save_wiki_cache(data: WikiCacheRequest) -> bool:
    """Saves wiki cache data to the file system."""
    cache_path = get_wiki_cache_path(data.repo.owner, data.repo.repo, data.repo.type, data.language)
    logger.info(f"Attempting to save wiki cache. Path: {cache_path}")
    try:
        payload = WikiCacheData(
            wiki_structure=data.wiki_structure,
            generated_pages=data.generated_pages,
            repo=data.repo,
            provider=data.provider,
            model=data.model
        )
        # Log size of data to be cached for debugging (avoid logging full content if large)
        try:
            payload_json = payload.model_dump_json()
            payload_size = len(payload_json.encode('utf-8'))
            logger.info(f"Payload prepared for caching. Size: {payload_size} bytes.")
        except Exception as ser_e:
            logger.warning(f"Could not serialize payload for size logging: {ser_e}")


        logger.info(f"Writing cache file to: {cache_path}")
        with open(cache_path, 'w', encoding='utf-8') as f:
            json.dump(payload.model_dump(), f, indent=2)
        logger.info(f"Wiki cache successfully saved to {cache_path}")
        return True
    except IOError as e:
        logger.error(f"IOError saving wiki cache to {cache_path}: {e.strerror} (errno: {e.errno})", exc_info=True)
        return False
    except Exception as e:
        logger.error(f"Unexpected error saving wiki cache to {cache_path}: {e}", exc_info=True)
        return False

# --- Wiki Cache API Endpoints ---

@app.get("/api/wiki_cache", response_model=Optional[WikiCacheData])
async def get_cached_wiki(
    owner: str = Query(..., description="Repository owner"),
    repo: str = Query(..., description="Repository name"),
    repo_type: str = Query(..., description="Repository type (e.g., github, gitlab)"),
    language: str = Query(..., description="Language of the wiki content")
):
    """
    Retrieves cached wiki data (structure and generated pages) for a repository.
    """
    # Language validation
    supported_langs = configs["lang_config"]["supported_languages"]
    if not supported_langs.__contains__(language):
        language = configs["lang_config"]["default"]

    logger.info(f"Attempting to retrieve wiki cache for {owner}/{repo} ({repo_type}), lang: {language}")
    cached_data = await read_wiki_cache(owner, repo, repo_type, language)
    if cached_data:
        return cached_data
    else:
        # Return 200 with null body if not found, as frontend expects this behavior
        # Or, raise HTTPException(status_code=404, detail="Wiki cache not found") if preferred
        logger.info(f"Wiki cache not found for {owner}/{repo} ({repo_type}), lang: {language}")
        return None

@app.post("/api/wiki_cache")
async def store_wiki_cache(request_data: WikiCacheRequest):
    """
    Stores generated wiki data (structure and pages) to the server-side cache.
    """
    # Language validation
    supported_langs = configs["lang_config"]["supported_languages"]

    if not supported_langs.__contains__(request_data.language):
        request_data.language = configs["lang_config"]["default"]

    logger.info(f"Attempting to save wiki cache for {request_data.repo.owner}/{request_data.repo.repo} ({request_data.repo.type}), lang: {request_data.language}")
    success = await save_wiki_cache(request_data)
    if success:
        return {"message": "Wiki cache saved successfully"}
    else:
        raise HTTPException(status_code=500, detail="Failed to save wiki cache")

@app.delete("/api/wiki_cache")
async def delete_wiki_cache(
    owner: str = Query(..., description="Repository owner"),
    repo: str = Query(..., description="Repository name"),
    repo_type: str = Query(..., description="Repository type (e.g., github, gitlab)"),
    language: str = Query(..., description="Language of the wiki content"),
    authorization_code: Optional[str] = Query(None, description="Authorization code")
):
    """
    Deletes a specific wiki cache from the file system.
    """
    # Language validation
    supported_langs = configs["lang_config"]["supported_languages"]
    if not supported_langs.__contains__(language):
        raise HTTPException(status_code=400, detail="Language is not supported")

    if WIKI_AUTH_MODE:
        logger.info("check the authorization code")
        if not authorization_code or WIKI_AUTH_CODE != authorization_code:
            raise HTTPException(status_code=401, detail="Authorization code is invalid")

    logger.info(f"Attempting to delete wiki cache for {owner}/{repo} ({repo_type}), lang: {language}")
    cache_path = get_wiki_cache_path(owner, repo, repo_type, language)

    if os.path.exists(cache_path):
        try:
            os.remove(cache_path)
            logger.info(f"Successfully deleted wiki cache: {cache_path}")
            return {"message": f"Wiki cache for {owner}/{repo} ({language}) deleted successfully"}
        except Exception as e:
            logger.error(f"Error deleting wiki cache {cache_path}: {e}")
            raise HTTPException(status_code=500, detail=f"Failed to delete wiki cache: {str(e)}")
    else:
        logger.warning(f"Wiki cache not found, cannot delete: {cache_path}")
        raise HTTPException(status_code=404, detail="Wiki cache not found")

@app.get("/health")
async def health_check():
    """Health check endpoint for Docker and monitoring"""
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "deepwiki-api"
    }

@app.get("/")
async def root():
    """Root endpoint to check if the API is running and list available endpoints dynamically."""
    # Collect routes dynamically from the FastAPI app
    endpoints = {}
    for route in app.routes:
        if hasattr(route, "methods") and hasattr(route, "path"):
            # Skip docs and static routes
            if route.path in ["/openapi.json", "/docs", "/redoc", "/favicon.ico"]:
                continue
            # Group endpoints by first path segment
            path_parts = route.path.strip("/").split("/")
            group = path_parts[0].capitalize() if path_parts[0] else "Root"
            method_list = list(route.methods - {"HEAD", "OPTIONS"})
            for method in method_list:
                endpoints.setdefault(group, []).append(f"{method} {route.path}")

    # Optionally, sort endpoints for readability
    for group in endpoints:
        endpoints[group].sort()

    return {
        "message": "Welcome to Streaming API",
        "version": "1.0.0",
        "endpoints": endpoints
    }

# --- Processed Projects Endpoint --- (New Endpoint)
@app.get("/api/processed_projects", response_model=List[ProcessedProjectEntry])
async def get_processed_projects():
    """
    Lists all processed projects found in the wiki cache directory.
    Projects are identified by files named like: deepwiki_cache_{repo_type}_{owner}_{repo}_{language}.json
    """
    project_entries: List[ProcessedProjectEntry] = []
    # WIKI_CACHE_DIR is already defined globally in the file

    try:
        if not os.path.exists(WIKI_CACHE_DIR):
            logger.info(f"Cache directory {WIKI_CACHE_DIR} not found. Returning empty list.")
            return []

        logger.info(f"Scanning for project cache files in: {WIKI_CACHE_DIR}")
        filenames = await asyncio.to_thread(os.listdir, WIKI_CACHE_DIR) # Use asyncio.to_thread for os.listdir

        for filename in filenames:
            if filename.startswith("deepwiki_cache_") and filename.endswith(".json"):
                file_path = os.path.join(WIKI_CACHE_DIR, filename)
                try:
                    stats = await asyncio.to_thread(os.stat, file_path) # Use asyncio.to_thread for os.stat
                    parts = filename.replace("deepwiki_cache_", "").replace(".json", "").split('_')

                    # Expecting repo_type_owner_repo_language
                    # Example: deepwiki_cache_github_AsyncFuncAI_deepwiki-open_en.json
                    # parts = [github, AsyncFuncAI, deepwiki-open, en]
                    if len(parts) >= 4:
                        repo_type = parts[0]
                        owner = parts[1]
                        language = parts[-1] # language is the last part
                        repo = "_".join(parts[2:-1]) # repo can contain underscores

                        project_entries.append(
                            ProcessedProjectEntry(
                                id=filename,
                                owner=owner,
                                repo=repo,
                                name=f"{owner}/{repo}",
                                repo_type=repo_type,
                                submittedAt=int(stats.st_mtime * 1000), # Convert to milliseconds
                                language=language
                            )
                        )
                    else:
                        logger.warning(f"Could not parse project details from filename: {filename}")
                except Exception as e:
                    logger.error(f"Error processing file {file_path}: {e}")
                    continue # Skip this file on error

        # Sort by most recent first
        project_entries.sort(key=lambda p: p.submittedAt, reverse=True)
        logger.info(f"Found {len(project_entries)} processed project entries.")
        return project_entries

    except Exception as e:
        logger.error(f"Error listing processed projects from {WIKI_CACHE_DIR}: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Failed to list processed projects from server cache.")


================================================
FILE: api/azureai_client.py
================================================
"""AzureOpenAI ModelClient integration."""

import os
from typing import (
    Dict,
    Sequence,
    Optional,
    List,
    Any,
    TypeVar,
    Callable,
    Generator,
    Union,
    Literal,
)
import re

import logging
import backoff

# optional import
from adalflow.utils.lazy_import import safe_import, OptionalPackages

import sys

openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1])
# Importing all Azure packages together
azure_modules = safe_import(
    OptionalPackages.AZURE.value[0],  # List of package names
    OptionalPackages.AZURE.value[1],  # Error message
)
# Manually add each module to sys.modules to make them available globally as if imported normally
azure_module_names = OptionalPackages.AZURE.value[0]
for name, module in zip(azure_module_names, azure_modules):
    sys.modules[name] = module

# Use the modules as if they were imported normally
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

# from azure.core.credentials import AccessToken
from openai import AzureOpenAI, AsyncAzureOpenAI, Stream
from openai import (
    APITimeoutError,
    InternalServerError,
    RateLimitError,
    UnprocessableEntityError,
    BadRequestError,
)
from openai.types import (
    Completion,
    CreateEmbeddingResponse,
)
from openai.types.chat import ChatCompletionChunk, ChatCompletion

from adalflow.core.model_client import ModelClient
from adalflow.core.types import (
    ModelType,
    EmbedderOutput,
    TokenLogProb,
    CompletionUsage,
    GeneratorOutput,
)
from adalflow.components.model_client.utils import parse_embedding_response

log = logging.getLogger(__name__)
T = TypeVar("T")


__all__ = ["AzureAIClient"]

# TODO: this overlaps with openai client largely, might need to refactor to subclass openai client to simplify the code


# completion parsing functions and you can combine them into one singple chat completion parser
def get_first_message_content(completion: ChatCompletion) -> str:
    r"""When we only need the content of the first message.
    It is the default parser for chat completion."""
    return completion.choices[0].message.content


# def _get_chat_completion_usage(completion: ChatCompletion) -> OpenAICompletionUsage:
#     return completion.usage


def parse_stream_response(completion: ChatCompletionChunk) -> str:
    r"""Parse the response of the stream API."""
    return completion.choices[0].delta.content


def handle_streaming_response(generator: Stream[ChatCompletionChunk]):
    r"""Handle the streaming response."""
    for completion in generator:
        log.debug(f"Raw chunk completion: {completion}")
        parsed_content = parse_stream_response(completion)
        yield parsed_content


def get_all_messages_content(completion: ChatCompletion) -> List[str]:
    r"""When the n > 1, get all the messages content."""
    return [c.message.content for c in completion.choices]


def get_probabilities(completion: ChatCompletion) -> List[List[TokenLogProb]]:
    r"""Get the probabilities of each token in the completion."""
    log_probs = []
    for c in completion.choices:
        content = c.logprobs.content
        print(content)
        log_probs_for_choice = []
        for openai_token_logprob in content:
            token = openai_token_logprob.token
            logprob = openai_token_logprob.logprob
            log_probs_for_choice.append(TokenLogProb(token=token, logprob=logprob))
        log_probs.append(log_probs_for_choice)
    return log_probs


class AzureAIClient(ModelClient):
    __doc__ = r"""
    A client wrapper for interacting with Azure OpenAI's API.

    This class provides support for both embedding and chat completion API calls.
    Users can use this class to simplify their interactions with Azure OpenAI models
    through the `Embedder` and `Generator` components.

    **Initialization:**

    You can initialize the `AzureAIClient` with either an API key or Azure Active Directory (AAD) token
    authentication. It is recommended to set environment variables for sensitive data like API keys.

    Args:
        api_key (Optional[str]): Azure OpenAI API key. Default is None.
        api_version (Optional[str]): API version to use. Default is None.
        azure_endpoint (Optional[str]): Azure OpenAI endpoint URL. Default is None.
        credential (Optional[DefaultAzureCredential]): Azure AD credential for token-based authentication. Default is None.
        chat_completion_parser (Callable[[Completion], Any]): Function to parse chat completions. Default is `get_first_message_content`.
        input_type (Literal["text", "messages"]): Format for input, either "text" or "messages". Default is "text".

    **Setup Instructions:**

    - **Using API Key:**
      Set up the following environment variables:
      ```bash
      export AZURE_OPENAI_API_KEY="your_api_key"
      export AZURE_OPENAI_ENDPOINT="your_endpoint"
      export AZURE_OPENAI_VERSION="your_version"
      ```

    - **Using Azure AD Token:**
      Ensure you have configured Azure AD credentials. The `DefaultAzureCredential` will automatically use your configured credentials.

    **Example Usage:**

    .. code-block:: python

        from azure.identity import DefaultAzureCredential
        from your_module import AzureAIClient  # Adjust import based on your module name

        # Initialize with API key
        client = AzureAIClient(
            api_key="your_api_key",
            api_version="2023-05-15",
            azure_endpoint="https://your-endpoint.openai.azure.com/"
        )

        # Or initialize with Azure AD token
        client = AzureAIClient(
            api_version="2023-05-15",
            azure_endpoint="https://your-endpoint.openai.azure.com/",
            credential=DefaultAzureCredential()
        )

        # Example call to the chat completion API
        api_kwargs = {
            "model": "gpt-3.5-turbo",
            "messages": [{"role": "user", "content": "What is the meaning of life?"}],
            "stream": True
        }
        response = client.call(api_kwargs=api_kwargs, model_type=ModelType.LLM)

        for chunk in response:
            print(chunk)


    **Notes:**
    - Ensure that the API key or credentials are correctly set up and accessible to avoid authentication errors.
    - Use `chat_completion_parser` to define how to extract and handle the chat completion responses.
    - The `input_type` parameter determines how input is formatted for the API call.

    **References:**
    - [Azure OpenAI API Documentation](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview)
    - [OpenAI API Documentation](https://platform.openai.com/docs/guides/text-generation)
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        api_version: Optional[str] = None,
        azure_endpoint: Optional[str] = None,
        credential: Optional[DefaultAzureCredential] = None,
        chat_completion_parser: Callable[[Completion], Any] = None,
        input_type: Literal["text", "messages"] = "text",
    ):
        r"""It is recommended to set the API_KEY into the  environment variable instead of passing it as an argument.


        Initializes the Azure OpenAI client with either API key or AAD token authentication.

        Args:
            api_key: Azure OpenAI API key.
            api_version: Azure OpenAI API version.
            azure_endpoint: Azure OpenAI endpoint.
            credential: Azure AD credential for token-based authentication.
            chat_completion_parser: Function to parse chat completions.
            input_type: Input format, either "text" or "messages".

        """
        super().__init__()

        # added api_type azure for azure Ai
        self.api_type = "azure"
        self._api_key = api_key
        self._apiversion = api_version
        self._azure_endpoint = azure_endpoint
        self._credential = credential
        self.sync_client = self.init_sync_client()
        self.async_client = None  # only initialize if the async call is called
        self.chat_completion_parser = (
            chat_completion_parser or get_first_message_content
        )
        self._input_type = input_type

    def init_sync_client(self):
        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
        azure_endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
        api_version = self._apiversion or os.getenv("AZURE_OPENAI_VERSION")
        # credential = self._credential or DefaultAzureCredential
        if not azure_endpoint:
            raise ValueError("Environment variable AZURE_OPENAI_ENDPOINT must be set")
        if not api_version:
            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")

        if api_key:
            return AzureOpenAI(
                api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version
            )
        elif self._credential:
            # credential = DefaultAzureCredential()
            token_provider = get_bearer_token_provider(
                DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
            )
            return AzureOpenAI(
                azure_ad_token_provider=token_provider,
                azure_endpoint=azure_endpoint,
                api_version=api_version,
            )
        else:
            raise ValueError(
                "Environment variable AZURE_OPENAI_API_KEY must be set or credential must be provided"
            )

    def init_async_client(self):
        api_key = self._api_key or os.getenv("AZURE_OPENAI_API_KEY")
        azure_endpoint = self._azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
        api_version = self._apiversion or os.getenv("AZURE_OPENAI_VERSION")
        # credential = self._credential or DefaultAzureCredential()
        if not azure_endpoint:
            raise ValueError("Environment variable AZURE_OPENAI_ENDPOINT must be set")
        if not api_version:
            raise ValueError("Environment variable AZURE_OPENAI_VERSION must be set")

        if api_key:
            return AsyncAzureOpenAI(
                api_key=api_key, azure_endpoint=azure_endpoint, api_version=api_version
            )
        elif self._credential:
            # credential = DefaultAzureCredential()
            token_provider = get_bearer_token_provider(
                DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
            )
            return AsyncAzureOpenAI(
                azure_ad_token_provider=token_provider,
                azure_endpoint=azure_endpoint,
                api_version=api_version,
            )
        else:
            raise ValueError(
                "Environment variable AZURE_OPENAI_API_KEY must be set or credential must be provided"
            )

    # def _parse_chat_completion(self, completion: ChatCompletion) -> "GeneratorOutput":
    #     # TODO: raw output it is better to save the whole completion as a source of truth instead of just the message
    #     try:
    #         data = self.chat_completion_parser(completion)
    #         usage = self.track_completion_usage(completion)
    #         return GeneratorOutput(
    #             data=data, error=None, raw_response=str(data), usage=usage
    #         )
    #     except Exception as e:
    #         log.error(f"Error parsing the completion: {e}")
    #         return GeneratorOutput(data=None, error=str(e), raw_response=completion)

    def parse_chat_completion(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> "GeneratorOutput":
        """Parse the completion, and put it into the raw_response."""
        log.debug(f"completion: {completion}, parser: {self.chat_completion_parser}")
        try:
            data = self.chat_completion_parser(completion)
            usage = self.track_completion_usage(completion)
            return GeneratorOutput(
                data=None, error=None, raw_response=data, usage=usage
            )
        except Exception as e:
            log.error(f"Error parsing the completion: {e}")
            return GeneratorOutput(data=None, error=str(e), raw_response=completion)

    def track_completion_usage(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> CompletionUsage:
        if isinstance(completion, ChatCompletion):
            usage: CompletionUsage = CompletionUsage(
                completion_tokens=completion.usage.completion_tokens,
                prompt_tokens=completion.usage.prompt_tokens,
                total_tokens=completion.usage.total_tokens,
            )
            return usage
        else:
            raise NotImplementedError(
                "streaming completion usage tracking is not implemented"
            )

    def parse_embedding_response(
        self, response: CreateEmbeddingResponse
    ) -> EmbedderOutput:
        r"""Parse the embedding response to a structure AdalFlow components can understand.

        Should be called in ``Embedder``.
        """
        try:
            return parse_embedding_response(response)
        except Exception as e:
            log.error(f"Error parsing the embedding response: {e}")
            return EmbedderOutput(data=[], error=str(e), raw_response=response)

    def convert_inputs_to_api_kwargs(
        self,
        input: Optional[Any] = None,
        model_kwargs: Dict = {},
        model_type: ModelType = ModelType.UNDEFINED,
    ) -> Dict:
        r"""
        Specify the API input type and output api_kwargs that will be used in _call and _acall methods.
        Convert the Component's standard input, and system_input(chat model) and model_kwargs into API-specific format
        """

        final_model_kwargs = model_kwargs.copy()
        if model_type == ModelType.EMBEDDER:
            if isinstance(input, str):
                input = [input]
            # convert input to input
            if not isinstance(input, Sequence):
                raise TypeError("input must be a sequence of text")
            final_model_kwargs["input"] = input
        elif model_type == ModelType.LLM:
            # convert input to messages
            messages: List[Dict[str, str]] = []

            if self._input_type == "messages":
                system_start_tag = "<START_OF_SYSTEM_PROMPT>"
                system_end_tag = "<END_OF_SYSTEM_PROMPT>"
                user_start_tag = "<START_OF_USER_PROMPT>"
                user_end_tag = "<END_OF_USER_PROMPT>"
                pattern = f"{system_start_tag}(.*?){system_end_tag}{user_start_tag}(.*?){user_end_tag}"
                # Compile the regular expression
                regex = re.compile(pattern)
                # Match the pattern
                match = regex.search(input)
                system_prompt, input_str = None, None

                if match:
                    system_prompt = match.group(1)
                    input_str = match.group(2)

                else:
                    print("No match found.")
                if system_prompt and input_str:
                    messages.append({"role": "system", "content": system_prompt})
                    messages.append({"role": "user", "content": input_str})
            if len(messages) == 0:
                messages.append({"role": "system", "content": input})
            final_model_kwargs["messages"] = messages
        else:
            raise ValueError(f"model_type {model_type} is not supported")
        return final_model_kwargs

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
        """
        kwargs is the combined input and model_kwargs.  Support streaming call.
        """
        log.info(f"api_kwargs: {api_kwargs}")
        if model_type == ModelType.EMBEDDER:
            return self.sync_client.embeddings.create(**api_kwargs)
        elif model_type == ModelType.LLM:
            if "stream" in api_kwargs and api_kwargs.get("stream", False):
                log.debug("streaming call")
                self.chat_completion_parser = handle_streaming_response
                return self.sync_client.chat.completions.create(**api_kwargs)
            return self.sync_client.chat.completions.create(**api_kwargs)
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    async def acall(
        self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED
    ):
        """
        kwargs is the combined input and model_kwargs
        """
        if self.async_client is None:
            self.async_client = self.init_async_client()
        if model_type == ModelType.EMBEDDER:
            return await self.async_client.embeddings.create(**api_kwargs)
        elif model_type == ModelType.LLM:
            return await self.async_client.chat.completions.create(**api_kwargs)
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @classmethod
    def from_dict(cls: type[T], data: Dict[str, Any]) -> T:
        obj = super().from_dict(data)
        # recreate the existing clients
        obj.sync_client = obj.init_sync_client()
        obj.async_client = obj.init_async_client()
        return obj

    def to_dict(self) -> Dict[str, Any]:
        r"""Convert the component to a dictionary."""
        # TODO: not exclude but save yes or no for recreating the clients
        exclude = [
            "sync_client",
            "async_client",
        ]  # unserializable object
        output = super().to_dict(exclude=exclude)
        return output


# if __name__ == "__main__":
#     from adalflow.core import Generator
#     from adalflow.utils import setup_env, get_logger

#     log = get_logger(level="DEBUG")

#     setup_env()
#     prompt_kwargs = {"input_str": "What is the meaning of life?"}

#     gen = Generator(
#         model_client=OpenAIClient(),
#         model_kwargs={"model": "gpt-3.5-turbo", "stream": True},
#     )
#     gen_response = gen(prompt_kwargs)
#     print(f"gen_response: {gen_response}")

#     for genout in gen_response.data:
#         print(f"genout: {genout}")

================================================
FILE: api/bedrock_client.py
================================================
"""AWS Bedrock ModelClient integration."""

import os
import json
import logging
import boto3
import botocore
import backoff
from typing import Dict, Any, Optional, List, Generator, Union, AsyncGenerator, Sequence

from adalflow.core.model_client import ModelClient
from adalflow.core.types import ModelType, GeneratorOutput, EmbedderOutput

# Configure logging
from api.logging_config import setup_logging

setup_logging()
log = logging.getLogger(__name__)

class BedrockClient(ModelClient):
    __doc__ = r"""A component wrapper for the AWS Bedrock API client.

    AWS Bedrock provides a unified API that gives access to various foundation models
    including Amazon's own models and third-party models like Anthropic Claude.

    Example:
        ```python
        from api.bedrock_client import BedrockClient

        client = BedrockClient()
        generator = adal.Generator(
            model_client=client,
            model_kwargs={"model": "anthropic.claude-3-sonnet-20240229-v1:0"}
        )
        ```
    """

    def __init__(
        self,
        aws_access_key_id: Optional[str] = None,
        aws_secret_access_key: Optional[str] = None,
        aws_session_token: Optional[str] = None,
        aws_region: Optional[str] = None,
        aws_role_arn: Optional[str] = None,
        *args,
        **kwargs
    ) -> None:
        """Initialize the AWS Bedrock client.
        
        Args:
            aws_access_key_id: AWS access key ID. If not provided, will use environment variable AWS_ACCESS_KEY_ID.
            aws_secret_access_key: AWS secret access key. If not provided, will use environment variable AWS_SECRET_ACCESS_KEY.
            aws_session_token: AWS session token. If not provided, will use environment variable AWS_SESSION_TOKEN.
            aws_region: AWS region. If not provided, will use environment variable AWS_REGION.
            aws_role_arn: AWS IAM role ARN for role-based authentication. If not provided, will use environment variable AWS_ROLE_ARN.
        """
        super().__init__(*args, **kwargs)
        from api.config import (
            AWS_ACCESS_KEY_ID,
            AWS_SECRET_ACCESS_KEY,
            AWS_SESSION_TOKEN,
            AWS_REGION,
            AWS_ROLE_ARN,
        )

        self.aws_access_key_id = aws_access_key_id or AWS_ACCESS_KEY_ID
        self.aws_secret_access_key = aws_secret_access_key or AWS_SECRET_ACCESS_KEY
        self.aws_session_token = aws_session_token or AWS_SESSION_TOKEN
        self.aws_region = aws_region or AWS_REGION or "us-east-1"
        self.aws_role_arn = aws_role_arn or AWS_ROLE_ARN
        
        self.sync_client = self.init_sync_client()
        self.async_client = None  # Initialize async client only when needed

    @classmethod
    def from_dict(cls, data: Dict[str, Any]):
        """Create an instance from a dictionary."""
        return cls(**data)

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary."""
        return {
            "aws_access_key_id": self.aws_access_key_id,
            "aws_secret_access_key": self.aws_secret_access_key,
            "aws_session_token": self.aws_session_token,
            "aws_region": self.aws_region,
            "aws_role_arn": self.aws_role_arn,
        }

    def __getstate__(self):
        """
        Customize serialization to exclude non-picklable client objects.
        This method is called by pickle when saving the object's state.
        """
        state = self.__dict__.copy()
        # Remove the unpicklable client instances
        if 'sync_client' in state:
            del state['sync_client']
        if 'async_client' in state:
            del state['async_client']
        return state

    def __setstate__(self, state):
        """
        Customize deserialization to re-create the client objects.
        This method is called by pickle when loading the object's state.
        """
        self.__dict__.update(state)
        # Re-initialize the clients after unpickling
        self.sync_client = self.init_sync_client()
        self.async_client = None  # It will be lazily initialized when acall is used

    def init_sync_client(self):
        """Initialize the synchronous AWS Bedrock client."""
        try:
            # Create a session with the provided credentials
            session = boto3.Session(
                aws_access_key_id=self.aws_access_key_id,
                aws_secret_access_key=self.aws_secret_access_key,
                aws_session_token=self.aws_session_token,
                region_name=self.aws_region
            )
            
            # If a role ARN is provided, assume that role
            if self.aws_role_arn:
                sts_client = session.client('sts')
                assumed_role = sts_client.assume_role(
                    RoleArn=self.aws_role_arn,
                    RoleSessionName="DeepWikiBedrockSession"
                )
                credentials = assumed_role['Credentials']
                
                # Create a new session with the assumed role credentials
                session = boto3.Session(
                    aws_access_key_id=credentials['AccessKeyId'],
                    aws_secret_access_key=credentials['SecretAccessKey'],
                    aws_session_token=credentials['SessionToken'],
                    region_name=self.aws_region
                )
            
            # Create the Bedrock client
            bedrock_runtime = session.client(
                service_name='bedrock-runtime',
                region_name=self.aws_region
            )
            
            return bedrock_runtime
            
        except Exception as e:
            log.error(f"Error initializing AWS Bedrock client: {str(e)}")
            # Return None to indicate initialization failure
            return None

    def init_async_client(self):
        """Initialize the asynchronous AWS Bedrock client.
        
        Note: boto3 doesn't have native async support, so we'll use the sync client
        in async methods and handle async behavior at a higher level.
        """
        # For now, just return the sync client
        return self.sync_client

    def _get_model_provider(self, model_id: str) -> str:
        """Extract the provider from the model ID.
        
        Args:
            model_id: The model inference ID, e.g., "anthropic.claude-3-sonnet-20240229-v1:0", "global.anthropic.claude-sonnet-4-5-20250929-v1:0", or "global.cohere.embed-v4:0"
            
        Returns:
            The provider name, e.g., "anthropic"
        """
        seg = model_id.split(".")
        if len(seg) >= 3:
            # regional format
            return seg[1]
        elif len(seg) == 2:
            # non-regional format
            return seg[0]
        else:
            # Default to Amazon if format is unexpected
            return "amazon"

    def _format_prompt_for_provider(self, provider: str, prompt: str, messages=None) -> Dict[str, Any]:
        """Format the prompt according to the provider's requirements.
        
        Args:
            provider: The provider name, e.g., "anthropic"
            prompt: The prompt text
            messages: Optional list of messages for chat models
            
        Returns:
            A dictionary with the formatted prompt
        """
        if provider == "anthropic":
            # Format for Claude models
            if messages:
                # Format as a conversation
                formatted_messages = []
                for msg in messages:
                    role = "user" if msg.get("role") == "user" else "assistant"
                    formatted_messages.append({
                        "role": role,
                        "content": [{"type": "text", "text": msg.get("content", "")}]
                    })
                return {
                    "anthropic_version": "bedrock-2023-05-31",
                    "messages": formatted_messages,
                    "max_tokens": 4096
                }
            else:
                # Format as a single prompt
                return {
                    "anthropic_version": "bedrock-2023-05-31",
                    "messages": [
                        {"role": "user", "content": [{"type": "text", "text": prompt}]}
                    ],
                    "max_tokens": 4096
                }
        elif provider == "amazon":
            # Format for Amazon Titan models
            return {
                "inputText": prompt,
                "textGenerationConfig": {
                    "maxTokenCount": 4096,
                    "stopSequences": [],
                    "temperature": 0.7,
                    "topP": 0.8
                }
            }
        elif provider == "cohere":
            # Format for Cohere models
            return {
                "prompt": prompt,
                "max_tokens": 4096,
                "temperature": 0.7,
                "p": 0.8
            }
        elif provider == "ai21":
            # Format for AI21 models
            return {
                "prompt": prompt,
                "maxTokens": 4096,
                "temperature": 0.7,
                "topP": 0.8
            }
        else:
            # Default format
            return {"prompt": prompt}

    def _extract_response_text(self, provider: str, response: Dict[str, Any]) -> str:
        """Extract the generated text from the response.
        
        Args:
            provider: The provider name, e.g., "anthropic"
            response: The response from the Bedrock API
            
        Returns:
            The generated text
        """
        if provider == "anthropic":
            return response.get("content", [{}])[0].get("text", "")
        elif provider == "amazon":
            return response.get("results", [{}])[0].get("outputText", "")
        elif provider == "cohere":
            return response.get("generations", [{}])[0].get("text", "")
        elif provider == "ai21":
            return response.get("completions", [{}])[0].get("data", {}).get("text", "")
        else:
            # Try to extract text from the response
            if isinstance(response, dict):
                for key in ["text", "content", "output", "completion"]:
                    if key in response:
                        return response[key]
            return str(response)

    def parse_embedding_response(self, response: Any) -> EmbedderOutput:
        """Parse Bedrock embedding response to EmbedderOutput format."""
        from adalflow.core.types import Embedding

        try:
            embedding_data: List[Embedding] = []

            if isinstance(response, dict) and "embeddings" in response:
                embeddings = response.get("embeddings") or []
                embedding_data = [
                    Embedding(embedding=emb, index=i) for i, emb in enumerate(embeddings)
                ]
            elif isinstance(response, dict) and "embedding" in response:
                emb = response.get("embedding") or []
                embedding_data = [Embedding(embedding=emb, index=0)]
            else:
                raise ValueError(f"Unexpected embedding response type: {type(response)}")

            return EmbedderOutput(data=embedding_data, error=None, raw_response=response)
        except Exception as e:
            log.error(f"Error parsing Bedrock embedding response: {e}")
            return EmbedderOutput(data=[], error=str(e), raw_response=response)

    @backoff.on_exception(
        backoff.expo,
        (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError),
        max_time=5,
    )
    def call(self, api_kwargs: Dict = None, model_type: ModelType = None) -> Any:
        """Make a synchronous call to the AWS Bedrock API."""
        api_kwargs = api_kwargs or {}
        
        # Check if client is initialized
        if not self.sync_client:
            error_msg = "AWS Bedrock client not initialized. Check your AWS credentials and region."
            log.error(error_msg)
            return error_msg
        
        if model_type == ModelType.LLM:
            model_id = api_kwargs.get("model", "anthropic.claude-3-sonnet-20240229-v1:0")
            provider = self._get_model_provider(model_id)
            
            # Get the prompt from api_kwargs
            prompt = api_kwargs.get("input", "")
            messages = api_kwargs.get("messages")
            
            # Format the prompt according to the provider
            request_body = self._format_prompt_for_provider(provider, prompt, messages)
            
            # Add model parameters if provided
            if "temperature" in api_kwargs:
                if provider == "anthropic":
                    request_body["temperature"] = api_kwargs["temperature"]
                elif provider == "amazon":
                    request_body["textGenerationConfig"]["temperature"] = api_kwargs["temperature"]
                elif provider == "cohere":
                    request_body["temperature"] = api_kwargs["temperature"]
                elif provider == "ai21":
                    request_body["temperature"] = api_kwargs["temperature"]
            
            if "top_p" in api_kwargs:
                if provider == "anthropic":
                    request_body["top_p"] = api_kwargs["top_p"]
                elif provider == "amazon":
                    request_body["textGenerationConfig"]["topP"] = api_kwargs["top_p"]
                elif provider == "cohere":
                    request_body["p"] = api_kwargs["top_p"]
                elif provider == "ai21":
                    request_body["topP"] = api_kwargs["top_p"]
            
            # Convert request body to JSON
            body = json.dumps(request_body)
            
            try:
                # Make the API call
                response = self.sync_client.invoke_model(
                    modelId=model_id,
                    body=body
                )
                
                # Parse the response
                response_body = json.loads(response["body"].read())
                
                # Extract the generated text
                generated_text = self._extract_response_text(provider, response_body)
                
                return generated_text
                
            except Exception as e:
                log.error(f"Error calling AWS Bedrock API: {str(e)}")
                return f"Error: {str(e)}"
        elif model_type == ModelType.EMBEDDER:
            model_id = api_kwargs.get("model", "amazon.titan-embed-text-v2:0")
            provider = self._get_model_provider(model_id)

            texts = api_kwargs.get("input", [])

            model_kwargs = api_kwargs.get("model_kwargs") or {}

            embeddings: List[List[float]] = []
            raw_responses: List[Dict[str, Any]] = []

            if provider == "amazon":
                # Amazon Titan Embed Text does not support batch; send one at a time.
                for text in texts:
                    request_body: Dict[str, Any] = {"inputText": text}

                    dimensions = model_kwargs.get("dimensions")
                    if dimensions is not None:
                        request_body["dimensions"] = int(dimensions)

                    normalize = model_kwargs.get("normalize")
                    if normalize is not None:
                        request_body["normalize"] = bool(normalize)

                    # Make the API call
                    response = self.sync_client.invoke_model(
                        modelId=model_id,
                        body=json.dumps(request_body),
                    )

                    # Parse the response
                    response_body = json.loads(response["body"].read())
                    raw_responses.append(response_body)

                    emb = response_body.get("embedding")
                    if emb is None:
                        raise ValueError(f"Embedding not found in response: {response_body}")
                    embeddings.append(emb)

            elif provider == "cohere":
                # Cohere supports batch; send all texts at once.
                request_body = {
                    "texts": texts,
                    "input_type": model_kwargs.get("input_type") or "search_document",
                }

                # Make the API call
                response = self.sync_client.invoke_model(
                    modelId=model_id,
                    body=json.dumps(request_body),
                )

                # Parse the response
                response_body = json.loads(response["body"].read())
                raw_responses.append(response_body)

                batch_embeddings = response_body.get("embeddings")
                if isinstance(batch_embeddings, list):
                    embeddings = batch_embeddings
                elif isinstance(batch_embeddings, dict) and "float" in batch_embeddings:
                    embeddings = batch_embeddings["float"]
                else:
                    raise ValueError(f"Embeddings not found in response: {response_body}")
            else:
                raise NotImplementedError(f"Embedding provider '{provider}' is not supported by the Bedrock client.")
            return {"embeddings": embeddings, "raw_responses": raw_responses}
        else:
            raise ValueError(f"Model type {model_type} is not supported by AWS Bedrock client")

    async def acall(self, api_kwargs: Dict = None, model_type: ModelType = None) -> Any:
        """Make an asynchronous call to the AWS Bedrock API."""
        # For now, just call the sync method
        # In a real implementation, you would use an async library or run the sync method in a thread pool
        return self.call(api_kwargs, model_type)

    def convert_inputs_to_api_kwargs(
        self, input: Any = None, model_kwargs: Dict = None, model_type: ModelType = None
    ) -> Dict:
        """Convert inputs to API kwargs for AWS Bedrock."""
        model_kwargs = model_kwargs or {}
        api_kwargs = {}
        
        if model_type == ModelType.LLM:
            api_kwargs["model"] = model_kwargs.get("model", "anthropic.claude-3-sonnet-20240229-v1:0")
            api_kwargs["input"] = input
            
            # Add model parameters
            if "temperature" in model_kwargs:
                api_kwargs["temperature"] = model_kwargs["temperature"]
            if "top_p" in model_kwargs:
                api_kwargs["top_p"] = model_kwargs["top_p"]
            
            return api_kwargs
        elif model_type == ModelType.EMBEDDER:
            if isinstance(input, str):
                inputs = [input]
            elif isinstance(input, Sequence):
                inputs = list(input)
            else:
                raise TypeError("input must be a string or sequence of strings")

            api_kwargs["model"] = model_kwargs.get("model", "amazon.titan-embed-text-v2:0")
            api_kwargs["input"] = inputs
            api_kwargs["model_kwargs"] = model_kwargs
            return api_kwargs
        else:
            raise ValueError(f"Model type {model_type} is not supported by AWS Bedrock client")


================================================
FILE: api/config/embedder.json
================================================
{
  "embedder": {
    "client_class": "OpenAIClient",
    "batch_size": 500,
    "model_kwargs": {
      "model": "text-embedding-3-small",
      "dimensions": 256,
      "encoding_format": "float"
    }
  },
  "embedder_ollama": {
    "client_class": "OllamaClient",
    "model_kwargs": {
      "model": "nomic-embed-text"
    }
  },
  "embedder_google": {
    "client_class": "GoogleEmbedderClient",
    "batch_size": 100,
    "model_kwargs": {
      "model": "gemini-embedding-001",
      "task_type": "SEMANTIC_SIMILARITY"
    }
  },
  "embedder_bedrock": {
    "client_class": "BedrockClient",
    "batch_size": 100,
    "model_kwargs": {
      "model": "amazon.titan-embed-text-v2:0",
      "dimensions": 256
    }
  },
  "retriever": {
    "top_k": 20
  },
  "text_splitter": {
    "split_by": "word",
    "chunk_size": 350,
    "chunk_overlap": 100
  }
}


================================================
FILE: api/config/embedder.json.bak
================================================
{
  "embedder": {
    "client_class": "OpenAIClient",
    "batch_size": 500,
    "model_kwargs": {
      "model": "text-embedding-3-small",
      "dimensions": 256,
      "encoding_format": "float"
    }
  },
  "retriever": {
    "top_k": 20
  },
  "text_splitter": {
    "split_by": "word",
    "chunk_size": 350,
    "chunk_overlap": 100
  }
}


================================================
FILE: api/config/embedder.ollama.json.bak
================================================
{
  "embedder_ollama": {
    "client_class": "OllamaClient",
    "model_kwargs": {
      "model": "nomic-embed-text"
    }
  },
  "embedder": {
    "client_class": "OllamaClient",
    "model_kwargs": {
      "model": "nomic-embed-text"
    }
  },
  "retriever": {
    "top_k": 20
  },
  "text_splitter": {
    "split_by": "word",
    "chunk_size": 350,
    "chunk_overlap": 100
  }
}


================================================
FILE: api/config/embedder.openai_compatible.json.bak
================================================
{
  "embedder": {
    "client_class": "OpenAIClient",
    "initialize_kwargs": {
      "api_key": "${OPENAI_API_KEY}",
      "base_url": "${OPENAI_BASE_URL}"
    },
    "batch_size": 10,
    "model_kwargs": {
      "model": "text-embedding-v3",
      "dimensions": 256,
      "encoding_format": "float"
    }
  },
  "embedder_ollama": {
    "client_class": "OllamaClient",
    "model_kwargs": {
      "model": "nomic-embed-text"
    }
  },
  "retriever": {
    "top_k": 20
  },
  "text_splitter": {
    "split_by": "word",
    "chunk_size": 350,
    "chunk_overlap": 100
  }
}


================================================
FILE: api/config/generator.json
================================================
{
  "default_provider": "google",
  "providers": {
    "dashscope": {
      "default_model": "qwen-plus",
      "supportsCustomModel": true,
      "models": {
        "qwen-plus": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "qwen-turbo": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "deepseek-r1": {
          "temperature": 0.7,
          "top_p": 0.8
        }
      }
    },
    "google": {
      "default_model": "gemini-2.5-flash",
      "supportsCustomModel": true,
      "models": {
        "gemini-2.5-flash": {
          "temperature": 1.0,
          "top_p": 0.8,
          "top_k": 20
        },
        "gemini-2.5-flash-lite": {
          "temperature": 1.0,
          "top_p": 0.8,
          "top_k": 20
        },
        "gemini-2.5-pro": {
          "temperature": 1.0,
          "top_p": 0.8,
          "top_k": 20
        }
      }
    },
    "openai": {
      "default_model": "gpt-5-nano",
      "supportsCustomModel": true,
      "models": {
        "gpt-5": {
          "temperature": 1.0
        },
        "gpt-5-nano": {
          "temperature": 1.0
        },
        "gpt-5-mini": {
          "temperature": 1.0
        },
        "gpt-4o": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "gpt-4.1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "o1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "o3": {
          "temperature": 1.0
        },
        "o4-mini": {
          "temperature": 1.0
        }
      }
    },
    "openrouter": {
      "default_model": "openai/gpt-5-nano",
      "supportsCustomModel": true,
      "models": {
        "openai/gpt-5-nano": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "openai/gpt-4o": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "deepseek/deepseek-r1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "openai/gpt-4.1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "openai/o1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "openai/o3": {
          "temperature": 1.0
        },
        "openai/o4-mini": {
          "temperature": 1.0
        },
        "anthropic/claude-3.7-sonnet": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "anthropic/claude-3.5-sonnet": {
          "temperature": 0.7,
          "top_p": 0.8
        }
      }
    },
    "ollama": {
      "default_model": "qwen3:1.7b",
      "supportsCustomModel": true,
      "models": {
        "qwen3:1.7b": {
          "options": {
            "temperature": 0.7,
            "top_p": 0.8,
            "num_ctx": 32000
          }
        },
        "llama3:8b": {
          "options": {
            "temperature": 0.7,
            "top_p": 0.8,
            "num_ctx": 8000
          }
        },
        "qwen3:8b": {
          "options": {
            "temperature": 0.7,
            "top_p": 0.8,
            "num_ctx": 32000
          }
        }
      }
    },
    "bedrock": {
      "client_class": "BedrockClient",
      "default_model": "anthropic.claude-3-sonnet-20240229-v1:0",
      "supportsCustomModel": true,
      "models": {
        "anthropic.claude-3-sonnet-20240229-v1:0": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "anthropic.claude-3-haiku-20240307-v1:0": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "anthropic.claude-3-opus-20240229-v1:0": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "amazon.titan-text-express-v1": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "cohere.command-r-v1:0": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "ai21.j2-ultra-v1": {
          "temperature": 0.7,
          "top_p": 0.8
        }
      }
    },
    "azure": {
      "client_class": "AzureAIClient",
      "default_model": "gpt-4o",
      "supportsCustomModel": true,
      "models": {
        "gpt-4o": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "gpt-4": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "gpt-35-turbo": {
          "temperature": 0.7,
          "top_p": 0.8
        },
        "gpt-4-turbo": {
          "temperature": 0.7,
          "top_p": 0.8
        }
      }
    }
  }
}


================================================
FILE: api/config/lang.json
================================================
{
  "supported_languages": {
    "en": "English",
    "ja": "Japanese (日本語)",
    "zh": "Mandarin Chinese (中文)",
    "zh-tw": "Traditional Chinese (繁體中文)",
    "es": "Spanish (Español)",
    "kr": "Korean (한국어)",
    "vi": "Vietnamese (Tiếng Việt)",
    "pt-br": "Brazilian Portuguese (Português Brasileiro)",
    "fr": "Français (French)",
    "ru": "Русский (Russian)"
  },
  "default": "en"
}


================================================
FILE: api/config/repo.json
================================================
{
  "file_filters": {
    "excluded_dirs": [
      "./.venv/", 
      "./venv/", 
      "./env/", 
      "./virtualenv/",
      "./node_modules/", 
      "./bower_components/", 
      "./jspm_packages/",
      "./.git/", 
      "./.svn/", 
      "./.hg/", 
      "./.bzr/"
    ],
    "excluded_files": [
      "yarn.lock", 
      "pnpm-lock.yaml", 
      "npm-shrinkwrap.json", 
      "poetry.lock",
      "Pipfile.lock", 
      "requirements.txt.lock", 
      "Cargo.lock", 
      "composer.lock",
      ".lock", 
      ".DS_Store", 
      "Thumbs.db", 
      "desktop.ini", 
      "*.lnk", 
      ".env", 
      ".env.*", 
      "*.env", 
      "*.cfg", 
      "*.ini", 
      ".flaskenv", 
      ".gitignore", 
      ".gitattributes", 
      ".gitmodules", 
      ".github", 
      ".gitlab-ci.yml", 
      ".prettierrc", 
      ".eslintrc", 
      ".eslintignore", 
      ".stylelintrc", 
      ".editorconfig", 
      ".jshintrc", 
      ".pylintrc", 
      ".flake8", 
      "mypy.ini", 
      "pyproject.toml", 
      "tsconfig.json", 
      "webpack.config.js", 
      "babel.config.js", 
      "rollup.config.js", 
      "jest.config.js", 
      "karma.conf.js", 
      "vite.config.js", 
      "next.config.js", 
      "*.min.js", 
      "*.min.css", 
      "*.bundle.js", 
      "*.bundle.css", 
      "*.map", 
      "*.gz", 
      "*.zip", 
      "*.tar", 
      "*.tgz", 
      "*.rar", 
      "*.7z", 
      "*.iso", 
      "*.dmg", 
      "*.img", 
      "*.msix", 
      "*.appx", 
      "*.appxbundle", 
      "*.xap", 
      "*.ipa", 
      "*.deb", 
      "*.rpm", 
      "*.msi", 
      "*.exe", 
      "*.dll", 
      "*.so", 
      "*.dylib", 
      "*.o", 
      "*.obj", 
      "*.jar", 
      "*.war", 
      "*.ear", 
      "*.jsm", 
      "*.class", 
      "*.pyc", 
      "*.pyd", 
      "*.pyo", 
      "__pycache__", 
      "*.a", 
      "*.lib", 
      "*.lo", 
      "*.la", 
      "*.slo", 
      "*.dSYM",
      "*.egg", 
      "*.egg-info", 
      "*.dist-info", 
      "*.eggs", 
      "node_modules",
      "bower_components", 
      "jspm_packages", 
      "lib-cov", 
      "coverage", 
      "htmlcov", 
      ".nyc_output", 
      ".tox", 
      "dist", 
      "build", 
      "bld", 
      "out", 
      "bin", 
      "target",
      "packages/*/dist", 
      "packages/*/build", 
      ".output"
    ]
  },
  "repository": {
    "max_size_mb": 50000
  }
}


================================================
FILE: api/config.py
================================================
import os
import json
import logging
import re
from pathlib import Path
from typing import List, Union, Dict, Any

logger = logging.getLogger(__name__)

from api.openai_client import OpenAIClient
from api.openrouter_client import OpenRouterClient
from api.bedrock_client import BedrockClient
from api.google_embedder_client import GoogleEmbedderClient
from api.azureai_client import AzureAIClient
from api.dashscope_client import DashscopeClient
from adalflow import GoogleGenAIClient, OllamaClient

# Get API keys from environment variables
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
OPENROUTER_API_KEY = os.environ.get('OPENROUTER_API_KEY')
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID')
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY')
AWS_SESSION_TOKEN = os.environ.get('AWS_SESSION_TOKEN')
AWS_REGION = os.environ.get('AWS_REGION')
AWS_ROLE_ARN = os.environ.get('AWS_ROLE_ARN')

# Set keys in environment (in case they're needed elsewhere in the code)
if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
if GOOGLE_API_KEY:
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
if OPENROUTER_API_KEY:
    os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
if AWS_ACCESS_KEY_ID:
    os.environ["AWS_ACCESS_KEY_ID"] = AWS_ACCESS_KEY_ID
if AWS_SECRET_ACCESS_KEY:
    os.environ["AWS_SECRET_ACCESS_KEY"] = AWS_SECRET_ACCESS_KEY
if AWS_SESSION_TOKEN:
    os.environ["AWS_SESSION_TOKEN"] = AWS_SESSION_TOKEN
if AWS_REGION:
    os.environ["AWS_REGION"] = AWS_REGION
if AWS_ROLE_ARN:
    os.environ["AWS_ROLE_ARN"] = AWS_ROLE_ARN

# Wiki authentication settings
raw_auth_mode = os.environ.get('DEEPWIKI_AUTH_MODE', 'False')
WIKI_AUTH_MODE = raw_auth_mode.lower() in ['true', '1', 't']
WIKI_AUTH_CODE = os.environ.get('DEEPWIKI_AUTH_CODE', '')

# Embedder settings
EMBEDDER_TYPE = os.environ.get('DEEPWIKI_EMBEDDER_TYPE', 'openai').lower()

# Get configuration directory from environment variable, or use default if not set
CONFIG_DIR = os.environ.get('DEEPWIKI_CONFIG_DIR', None)

# Client class mapping
CLIENT_CLASSES = {
    "GoogleGenAIClient": GoogleGenAIClient,
    "GoogleEmbedderClient": GoogleEmbedderClient,
    "OpenAIClient": OpenAIClient,
    "OpenRouterClient": OpenRouterClient,
    "OllamaClient": OllamaClient,
    "BedrockClient": BedrockClient,
    "AzureAIClient": AzureAIClient,
    "DashscopeClient": DashscopeClient
}

def replace_env_placeholders(config: Union[Dict[str, Any], List[Any], str, Any]) -> Union[Dict[str, Any], List[Any], str, Any]:
    """
    Recursively replace placeholders like "${ENV_VAR}" in string values
    within a nested configuration structure (dicts, lists, strings)
    with environment variable values. Logs a warning if a placeholder is not found.
    """
    pattern = re.compile(r"\$\{([A-Z0-9_]+)\}")

    def replacer(match: re.Match[str]) -> str:
        env_var_name = match.group(1)
        original_placeholder = match.group(0)
        env_var_value = os.environ.get(env_var_name)
        if env_var_value is None:
            logger.warning(
                f"Environment variable placeholder '{original_placeholder}' was not found in the environment. "
                f"The placeholder string will be used as is."
            )
            return original_placeholder
        return env_var_value

    if isinstance(config, dict):
        return {k: replace_env_placeholders(v) for k, v in config.items()}
    elif isinstance(config, list):
        return [replace_env_placeholders(item) for item in config]
    elif isinstance(config, str):
        return pattern.sub(replacer, config)
    else:
        # Handles numbers, booleans, None, etc.
        return config

# Load JSON configuration file
def load_json_config(filename):
    try:
        # If environment variable is set, use the directory specified by it
        if CONFIG_DIR:
            config_path = Path(CONFIG_DIR) / filename
        else:
            # Otherwise use default directory
            config_path = Path(__file__).parent / "config" / filename

        logger.info(f"Loading configuration from {config_path}")

        if not config_path.exists():
            logger.warning(f"Configuration file {config_path} does not exist")
            return {}

        with open(config_path, 'r', encoding='utf-8') as f:
            config = json.load(f)
            config = replace_env_placeholders(config)
            return config
    except Exception as e:
        logger.error(f"Error loading configuration file {filename}: {str(e)}")
        return {}

# Load generator model configuration
def load_generator_config():
    generator_config = load_json_config("generator.json")

    # Add client classes to each provider
    if "providers" in generator_config:
        for provider_id, provider_config in generator_config["providers"].items():
            # Try to set client class from client_class
            if provider_config.get("client_class") in CLIENT_CLASSES:
                provider_config["model_client"] = CLIENT_CLASSES[provider_config["client_class"]]
            # Fall back to default mapping based on provider_id
            elif provider_id in ["google", "openai", "openrouter", "ollama", "bedrock", "azure", "dashscope"]:
                default_map = {
                    "google": GoogleGenAIClient,
                    "openai": OpenAIClient,
                    "openrouter": OpenRouterClient,
                    "ollama": OllamaClient,
                    "bedrock": BedrockClient,
                    "azure": AzureAIClient,
                    "dashscope": DashscopeClient
                }
                provider_config["model_client"] = default_map[provider_id]
            else:
                logger.warning(f"Unknown provider or client class: {provider_id}")

    return generator_config

# Load embedder configuration
def load_embedder_config():
    embedder_config = load_json_config("embedder.json")

    # Process client classes
    for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock"]:
        if key in embedder_config and "client_class" in embedder_config[key]:
            class_name = embedder_config[key]["client_class"]
            if class_name in CLIENT_CLASSES:
                embedder_config[key]["model_client"] = CLIENT_CLASSES[class_name]

    return embedder_config

def get_embedder_config():
    """
    Get the current embedder configuration based on DEEPWIKI_EMBEDDER_TYPE.

    Returns:
        dict: The embedder configuration with model_client resolved
    """
    embedder_type = EMBEDDER_TYPE
    if embedder_type == 'bedrock' and 'embedder_bedrock' in configs:
        return configs.get("embedder_bedrock", {})
    elif embedder_type == 'google' and 'embedder_google' in configs:
        return configs.get("embedder_google", {})
    elif embedder_type == 'ollama' and 'embedder_ollama' in configs:
        return configs.get("embedder_ollama", {})
    else:
        return configs.get("embedder", {})

def is_ollama_embedder():
    """
    Check if the current embedder configuration uses OllamaClient.

    Returns:
        bool: True if using OllamaClient, False otherwise
    """
    embedder_config = get_embedder_config()
    if not embedder_config:
        return False

    # Check if model_client is OllamaClient
    model_client = embedder_config.get("model_client")
    if model_client:
        return model_client.__name__ == "OllamaClient"

    # Fallback: check client_class string
    client_class = embedder_config.get("client_class", "")
    return client_class == "OllamaClient"

def is_google_embedder():
    """
    Check if the current embedder configuration uses GoogleEmbedderClient.

    Returns:
        bool: True if using GoogleEmbedderClient, False otherwise
    """
    embedder_config = get_embedder_config()
    if not embedder_config:
        return False

    # Check if model_client is GoogleEmbedderClient
    model_client = embedder_config.get("model_client")
    if model_client:
        return model_client.__name__ == "GoogleEmbedderClient"

    # Fallback: check client_class string
    client_class = embedder_config.get("client_class", "")
    return client_class == "GoogleEmbedderClient"

def is_bedrock_embedder():
    """
    Check if the current embedder configuration uses BedrockClient.

    Returns:
        bool: True if using BedrockClient, False otherwise
    """
    embedder_config = get_embedder_config()
    if not embedder_config:
        return False

    model_client = embedder_config.get("model_client")
    if model_client:
        return model_client.__name__ == "BedrockClient"

    client_class = embedder_config.get("client_class", "")
    return client_class == "BedrockClient"

def get_embedder_type():
    """
    Get the current embedder type based on configuration.
    
    Returns:
        str: 'bedrock', 'ollama', 'google', or 'openai' (default)
    """
    if is_bedrock_embedder():
        return 'bedrock'
    elif is_ollama_embedder():
        return 'ollama'
    elif is_google_embedder():
        return 'google'
    else:
        return 'openai'

# Load repository and file filters configuration
def load_repo_config():
    return load_json_config("repo.json")

# Load language configuration
def load_lang_config():
    default_config = {
        "supported_languages": {
            "en": "English",
            "ja": "Japanese (日本語)",
            "zh": "Mandarin Chinese (中文)",
            "zh-tw": "Traditional Chinese (繁體中文)",
            "es": "Spanish (Español)",
            "kr": "Korean (한국어)",
            "vi": "Vietnamese (Tiếng Việt)",
            "pt-br": "Brazilian Portuguese (Português Brasileiro)",
            "fr": "Français (French)",
            "ru": "Русский (Russian)"
        },
        "default": "en"
    }

    loaded_config = load_json_config("lang.json") # Let load_json_config handle path and loading

    if not loaded_config:
        return default_config

    if "supported_languages" not in loaded_config or "default" not in loaded_config:
        logger.warning("Language configuration file 'lang.json' is malformed. Using default language configuration.")
        return default_config

    return loaded_config

# Default excluded directories and files
DEFAULT_EXCLUDED_DIRS: List[str] = [
    # Virtual environments and package managers
    "./.venv/", "./venv/", "./env/", "./virtualenv/",
    "./node_modules/", "./bower_components/", "./jspm_packages/",
    # Version control
    "./.git/", "./.svn/", "./.hg/", "./.bzr/",
    # Cache and compiled files
    "./__pycache__/", "./.pytest_cache/", "./.mypy_cache/", "./.ruff_cache/", "./.coverage/",
    # Build and distribution
    "./dist/", "./build/", "./out/", "./target/", "./bin/", "./obj/",
    # Documentation
    "./docs/", "./_docs/", "./site-docs/", "./_site/",
    # IDE specific
    "./.idea/", "./.vscode/", "./.vs/", "./.eclipse/", "./.settings/",
    # Logs and temporary files
    "./logs/", "./log/", "./tmp/", "./temp/",
]

DEFAULT_EXCLUDED_FILES: List[str] = [
    "yarn.lock", "pnpm-lock.yaml", "npm-shrinkwrap.json", "poetry.lock",
    "Pipfile.lock", "requirements.txt.lock", "Cargo.lock", "composer.lock",
    ".lock", ".DS_Store", "Thumbs.db", "desktop.ini", "*.lnk", ".env",
    ".env.*", "*.env", "*.cfg", "*.ini", ".flaskenv", ".gitignore",
    ".gitattributes", ".gitmodules", ".github", ".gitlab-ci.yml",
    ".prettierrc", ".eslintrc", ".eslintignore", ".stylelintrc",
    ".editorconfig", ".jshintrc", ".pylintrc", ".flake8", "mypy.ini",
    "pyproject.toml", "tsconfig.json", "webpack.config.js", "babel.config.js",
    "rollup.config.js", "jest.config.js", "karma.conf.js", "vite.config.js",
    "next.config.js", "*.min.js", "*.min.css", "*.bundle.js", "*.bundle.css",
    "*.map", "*.gz", "*.zip", "*.tar", "*.tgz", "*.rar", "*.7z", "*.iso",
    "*.dmg", "*.img", "*.msix", "*.appx", "*.appxbundle", "*.xap", "*.ipa",
    "*.deb", "*.rpm", "*.msi", "*.exe", "*.dll", "*.so", "*.dylib", "*.o",
    "*.obj", "*.jar", "*.war", "*.ear", "*.jsm", "*.class", "*.pyc", "*.pyd",
    "*.pyo", "__pycache__", "*.a", "*.lib", "*.lo", "*.la", "*.slo", "*.dSYM",
    "*.egg", "*.egg-info", "*.dist-info", "*.eggs", "node_modules",
    "bower_components", "jspm_packages", "lib-cov", "coverage", "htmlcov",
    ".nyc_output", ".tox", "dist", "build", "bld", "out", "bin", "target",
    "packages/*/dist", "packages/*/build", ".output"
]

# Initialize empty configuration
configs = {}

# Load all configuration files
generator_config = load_generator_config()
embedder_config = load_embedder_config()
repo_config = load_repo_config()
lang_config = load_lang_config()

# Update configuration
if generator_config:
    configs["default_provider"] = generator_config.get("default_provider", "google")
    configs["providers"] = generator_config.get("providers", {})

# Update embedder configuration
if embedder_config:
    for key in ["embedder", "embedder_ollama", "embedder_google", "embedder_bedrock", "retriever", "text_splitter"]:
        if key in embedder_config:
            configs[key] = embedder_config[key]

# Update repository configuration
if repo_config:
    for key in ["file_filters", "repository"]:
        if key in repo_config:
            configs[key] = repo_config[key]

# Update language configuration
if lang_config:
    configs["lang_config"] = lang_config


def get_model_config(provider="google", model=None):
    """
    Get configuration for the specified provider and model

    Parameters:
        provider (str): Model provider ('google', 'openai', 'openrouter', 'ollama', 'bedrock')
        model (str): Model name, or None to use default model

    Returns:
        dict: Configuration containing model_client, model and other parameters
    """
    # Get provider configuration
    if "providers" not in configs:
        raise ValueError("Provider configuration not loaded")

    provider_config = configs["providers"].get(provider)
    if not provider_config:
        raise ValueError(f"Configuration for provider '{provider}' not found")

    model_client = provider_config.get("model_client")
    if not model_client:
        raise ValueError(f"Model client not specified for provider '{provider}'")

    # If model not provided, use default model for the provider
    if not model:
        model = provider_config.get("default_model")
        if not model:
            raise ValueError(f"No default model specified for provider '{provider}'")

    # Get model parameters (if present)
    model_params = {}
    if model in provider_config.get("models", {}):
        model_params = provider_config["models"][model]
    else:
        default_model = provider_config.get("default_model")
        model_params = provider_config["models"][default_model]

    # Prepare base configuration
    result = {
        "model_client": model_client,
    }

    # Provider-specific adjustments
    if provider == "ollama":
        # Ollama uses a slightly different parameter structure
        if "options" in model_params:
            result["model_kwargs"] = {"model": model, **model_params["options"]}
        else:
            result["model_kwargs"] = {"model": model}
    else:
        # Standard structure for other providers
        result["model_kwargs"] = {"model": model, **model_params}

    return result


================================================
FILE: api/dashscope_client.py
================================================
"""Dashscope (Alibaba Cloud) ModelClient integration."""

import os
import pickle
from typing import (
    Dict,
    Optional,
    Any,
    Callable,
    Generator,
    Union,
    Literal,
    List,
    Sequence,
)

import logging
import backoff
from copy import deepcopy
from tqdm import tqdm

# optional import
from adalflow.utils.lazy_import import safe_import, OptionalPackages

openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1])

from openai import OpenAI, AsyncOpenAI, Stream
from openai import (
    APITimeoutError,
    InternalServerError,
    RateLimitError,
    UnprocessableEntityError,
    BadRequestError,
)
from openai.types import (
    Completion,
    CreateEmbeddingResponse,
)
from openai.types.chat import ChatCompletionChunk, ChatCompletion

from adalflow.core.model_client import ModelClient
from adalflow.core.types import (
    ModelType,
    EmbedderOutput,
    CompletionUsage,
    GeneratorOutput,
    Document,
    Embedding,
    EmbedderOutputType,
    EmbedderInputType,
)
from adalflow.core.component import DataComponent
from adalflow.core.embedder import (
    BatchEmbedderOutputType,
    BatchEmbedderInputType,
)
import adalflow.core.functional as F
from adalflow.components.model_client.utils import parse_embedding_response

from api.logging_config import setup_logging

# # Disable tqdm progress bars
# os.environ["TQDM_DISABLE"] = "1"

setup_logging()
log = logging.getLogger(__name__)

def get_first_message_content(completion: ChatCompletion) -> str:
    """When we only need the content of the first message."""
    log.info(f"🔍 get_first_message_content called with: {type(completion)}")
    log.debug(f"raw completion: {completion}")
    
    try:
        if hasattr(completion, 'choices') and len(completion.choices) > 0:
            choice = completion.choices[0]
            if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
                content = choice.message.content
                log.info(f"✅ Successfully extracted content: {type(content)}, length: {len(content) if content else 0}")
                return content
            else:
                log.error("❌ Choice doesn't have message.content")
                return str(completion)
        else:
            log.error("❌ Completion doesn't have choices")
            return str(completion)
    except Exception as e:
        log.error(f"❌ Error in get_first_message_content: {e}")
        return str(completion)


def parse_stream_response(completion: ChatCompletionChunk) -> str:
    """Parse the response of the stream API."""
    return completion.choices[0].delta.content


def handle_streaming_response(generator: Stream[ChatCompletionChunk]):
    """Handle the streaming response."""
    for completion in generator:
        log.debug(f"Raw chunk completion: {completion}")
        parsed_content = parse_stream_response(completion)
        yield parsed_content


class DashscopeClient(ModelClient):
    """A component wrapper for the Dashscope (Alibaba Cloud) API client.

    Dashscope provides access to Alibaba Cloud's Qwen and other models through an OpenAI-compatible API.
    
    Args:
        api_key (Optional[str], optional): Dashscope API key. Defaults to None.
        workspace_id (Optional[str], optional): Dashscope workspace ID. Defaults to None.
        base_url (str): The API base URL. Defaults to "https://dashscope.aliyuncs.com/compatible-mode/v1".
        env_api_key_name (str): Environment variable name for the API key. Defaults to "DASHSCOPE_API_KEY".
        env_workspace_id_name (str): Environment variable name for the workspace ID. Defaults to "DASHSCOPE_WORKSPACE_ID".

    References:
        - Dashscope API Documentation: https://help.aliyun.com/zh/dashscope/
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        workspace_id: Optional[str] = None,
        chat_completion_parser: Callable[[Completion], Any] = None,
        input_type: Literal["text", "messages"] = "text",
        base_url: Optional[str] = None,
        env_base_url_name: str = "DASHSCOPE_BASE_URL",
        env_api_key_name: str = "DASHSCOPE_API_KEY",
        env_workspace_id_name: str = "DASHSCOPE_WORKSPACE_ID",
    ):
        super().__init__()
        self._api_key = api_key
        self._workspace_id = workspace_id
        self._env_api_key_name = env_api_key_name
        self._env_workspace_id_name = env_workspace_id_name
        self._env_base_url_name = env_base_url_name
        self.base_url = base_url or os.getenv(self._env_base_url_name, "https://dashscope.aliyuncs.com/compatible-mode/v1")
        self.sync_client = self.init_sync_client()
        self.async_client = None
        
        # Force use of get_first_message_content to ensure string output
        self.chat_completion_parser = get_first_message_content
        self._input_type = input_type
        self._api_kwargs = {}

    def _prepare_client_config(self):
        """
        Private helper method to prepare client configuration.
        
        Returns:
            tuple: (api_key, workspace_id, base_url) for client initialization
        
        Raises:
            ValueError: If API key is not provided
        """
        api_key = self._api_key or os.getenv(self._env_api_key_name)
        workspace_id = self._workspace_id or os.getenv(self._env_workspace_id_name)
        
        if not api_key:
            raise ValueError(
                f"Environment variable {self._env_api_key_name} must be set"
            )
        
        if not workspace_id:
            log.warning(f"Environment variable {self._env_workspace_id_name} not set. Some features may not work properly.")
        
        # For Dashscope, we need to include the workspace ID in the base URL if provided
        base_url = self.base_url
        if workspace_id:
            # Add workspace ID to headers or URL as required by Dashscope
            base_url = f"{self.base_url.rstrip('/')}"
        
        return api_key, workspace_id, base_url

    def init_sync_client(self):
        api_key, workspace_id, base_url = self._prepare_client_config()
        
        client = OpenAI(api_key=api_key, base_url=base_url)
        
        # Store workspace_id for later use in requests
        if workspace_id:
            client._workspace_id = workspace_id
        
        return client

    def init_async_client(self):
        api_key, workspace_id, base_url = self._prepare_client_config()
        
        client = AsyncOpenAI(api_key=api_key, base_url=base_url)
        
        # Store workspace_id for later use in requests
        if workspace_id:
            client._workspace_id = workspace_id
        
        return client

    def parse_chat_completion(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> "GeneratorOutput":
        """Parse the completion response to a GeneratorOutput."""
        try:
            # If the completion is already a GeneratorOutput, return it directly (prevent recursion)
            if isinstance(completion, GeneratorOutput):
                return completion
            
            # Check if it's a ChatCompletion object (non-streaming response)
            if hasattr(completion, 'choices') and hasattr(completion, 'usage'):
                # ALWAYS extract the string content directly
                try:
                    # Direct extraction of message content
                    if (hasattr(completion, 'choices') and 
                        len(completion.choices) > 0 and 
                        hasattr(completion.choices[0], 'message') and 
                        hasattr(completion.choices[0].message, 'content')):
                        
                        content = completion.choices[0].message.content
                        if isinstance(content, str):
                            parsed_data = content
                        else:
                            parsed_data = str(content)
                    else:
                        # Fallback: convert entire completion to string
                        parsed_data = str(completion)
                        
                except Exception as e:
                    # Ultimate fallback
                    parsed_data = str(completion)
                
                return GeneratorOutput(
                    data=parsed_data,
                    usage=CompletionUsage(
                        completion_tokens=completion.usage.completion_tokens,
                        prompt_tokens=completion.usage.prompt_tokens,
                        total_tokens=completion.usage.total_tokens,
                    ),
                    raw_response=str(completion),
                )
            else:
                # Handle streaming response - collect all content parts into a single string
                content_parts = []
                usage_info = None
                for chunk in completion:
                    if chunk.choices[0].delta.content:
                        content_parts.append(chunk.choices[0].delta.content)
                    # Try to get usage info from the last chunk
                    if hasattr(chunk, 'usage') and chunk.usage:
                        usage_info = chunk.usage
                
                # Join all content parts into a single string
                full_content = ''.join(content_parts)
                
                # Create usage object
                usage = None
                if usage_info:
                    usage = CompletionUsage(
                        completion_tokens=usage_info.completion_tokens,
                        prompt_tokens=usage_info.prompt_tokens,
                        total_tokens=usage_info.total_tokens,
                    )
                
                return GeneratorOutput(
                    data=full_content,
                    usage=usage,
                    raw_response="streaming"
                )
        except Exception as e:
            log.error(f"Error parsing completion: {e}")
            raise

    def track_completion_usage(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> CompletionUsage:
        """Track the completion usage."""
        if isinstance(completion, ChatCompletion):
            return CompletionUsage(
                completion_tokens=completion.usage.completion_tokens,
                prompt_tokens=completion.usage.prompt_tokens,
                total_tokens=completion.usage.total_tokens,
            )
        else:
            # For streaming, we can't track usage accurately
            return CompletionUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)

    def parse_embedding_response(
        self, response: CreateEmbeddingResponse
    ) -> EmbedderOutput:
        """Parse the embedding response to a EmbedderOutput."""
        # Add detailed debugging
        try:
            result = parse_embedding_response(response)
            if result.data:
                log.info(f"🔍 Number of embeddings: {len(result.data)}")
                if len(result.data) > 0:
                    log.info(f"🔍 First embedding length: {len(result.data[0].embedding) if hasattr(result.data[0], 'embedding') else 'N/A'}")
            else:
                log.warning(f"🔍 No embedding data found in result")
            return result
        except Exception as e:
            log.error(f"🔍 Error parsing DashScope embedding response: {e}")
            log.error(f"🔍 Raw response details: {repr(response)}")
            return EmbedderOutput(data=[], error=str(e), raw_response=response)

    def convert_inputs_to_api_kwargs(
        self,
        input: Optional[Any] = None,
        model_kwargs: Dict = {},
        model_type: ModelType = ModelType.UNDEFINED,
    ) -> Dict:
        """Convert inputs to API kwargs."""
        final_model_kwargs = model_kwargs.copy()
        
        if model_type == ModelType.LLM:
            messages = []
            if isinstance(input, str):
                messages = [{"role": "user", "content": input}]
            elif isinstance(input, list):
                messages = input
            else:
                raise ValueError(f"Unsupported input type: {type(input)}")
            
            api_kwargs = {
                "messages": messages,
                **final_model_kwargs
            }
            
            # Add workspace ID to headers if available
            workspace_id = getattr(self.sync_client, '_workspace_id', None) or getattr(self.async_client, '_workspace_id', None)
            if workspace_id:
                # Dashscope may require workspace ID in headers
                if 'extra_headers' not in api_kwargs:
                    api_kwargs['extra_headers'] = {}
                api_kwargs['extra_headers']['X-DashScope-WorkSpace'] = workspace_id
            
            return api_kwargs
            
        elif model_type == ModelType.EMBEDDER:
            # Convert Documents to text strings for embedding
            processed_input = input
            if isinstance(input, list):
                # Extract text from Document objects
                processed_input = []
                for item in input:
                    if hasattr(item, 'text'):
                        # It's a Document object, extract text
                        processed_input.append(item.text)
                    elif isinstance(item, str):
                        # It's already a string
                        processed_input.append(item)
                    else:
                        # Try to convert to string
                        processed_input.append(str(item))
            elif hasattr(input, 'text'):
                # Single Document object
                processed_input = input.text
            elif isinstance(input, str):
                # Single string
                processed_input = input
            else:
                # Convert to string as fallback
                processed_input = str(input)
            
            api_kwargs = {
                "input": processed_input,
                **final_model_kwargs
            }
            
            # Add workspace ID to headers if available
            workspace_id = getattr(self.sync_client, '_workspace_id', None) or getattr(self.async_client, '_workspace_id', None)
            if workspace_id:
                if 'extra_headers' not in api_kwargs:
                    api_kwargs['extra_headers'] = {}
                api_kwargs['extra_headers']['X-DashScope-WorkSpace'] = workspace_id
            
            return api_kwargs
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
        """Call the Dashscope API."""
        if model_type == ModelType.LLM:
            if not api_kwargs.get("stream", False):
                # For non-streaming, enable_thinking must be false.
                # Pass it via extra_body to avoid TypeError from openai client validation.
                extra_body = api_kwargs.get("extra_body", {})
                extra_body["enable_thinking"] = False
                api_kwargs["extra_body"] = extra_body

            completion = self.sync_client.chat.completions.create(**api_kwargs)
            
            if api_kwargs.get("stream", False):
                return handle_streaming_response(completion)
            else:
                return self.parse_chat_completion(completion)
        elif model_type == ModelType.EMBEDDER:
            # Extract input texts from api_kwargs
            texts = api_kwargs.get("input", [])
            
            if not texts:
                log.warning("😭 No input texts provided")
                return EmbedderOutput(data=[], error="No input texts provided", raw_response=None)
            
            # Ensure texts is a list
            if isinstance(texts, str):
                texts = [texts]
            
            # Filter out empty or None texts - following HuggingFace client pattern
            valid_texts = []
            valid_indices = []
            for i, text in enumerate(texts):
                if text and isinstance(text, str) and text.strip():
                    valid_texts.append(text)
                    valid_indices.append(i)
                else:
                    log.warning(f"🔍 Skipping empty or invalid text at index {i}: type={type(text)}, length={len(text) if hasattr(text, '__len__') else 'N/A'}, repr={repr(text)[:100]}")
            
            if not valid_texts:
                log.error("😭 No valid texts found after filtering")
                return EmbedderOutput(data=[], error="No valid texts found after filtering", raw_response=None)
            
            if len(valid_texts) != len(texts):
                filtered_count = len(texts) - len(valid_texts)
                log.warning(f"🔍 Filtered out {filtered_count} empty/invalid texts out of {len(texts)} total texts")
            
            # Create modified api_kwargs with only valid texts
            filtered_api_kwargs = api_kwargs.copy()
            filtered_api_kwargs["input"] = valid_texts
            
            log.info(f"🔍 DashScope embedding API call with {len(valid_texts)} valid texts out of {len(texts)} total")
            
            try:
                response = self.sync_client.embeddings.create(**filtered_api_kwargs)
                log.info(f"🔍 DashScope API call successful, response type: {type(response)}")
                result = self.parse_embedding_response(response)
                
                # If we filtered texts, we need to create embeddings for the original indices
                if len(valid_texts) != len(texts):
                    log.info(f"🔍 Creating embeddings for {len(texts)} original positions")
                    
                    # Get the correct embedding dimension from the first valid embedding
                    embedding_dim = None  # Must be determined from a successful response
                    if result.data and len(result.data) > 0 and hasattr(result.data[0], 'embedding'):
                        embedding_dim = len(result.data[0].embedding)
                        log.info(f"🔍 Using embedding dimension: {embedding_dim}")
                    
                    final_data = []
                    valid_idx = 0
                    for i in range(len(texts)):
                        if i in valid_indices:
                            # Use the embedding from valid texts
                            final_data.append(result.data[valid_idx])
                            valid_idx += 1
                        else:
                            # Create zero embedding for filtered texts with correct dimension
                            log.warning(f"🔍 Creating zero embedding for filtered text at index {i}")
                            final_data.append(Embedding(
                                embedding=[0.0] * embedding_dim,  # Use correct embedding dimension
                                index=i
                            ))
                    
                    result = EmbedderOutput(
                        data=final_data,
                        error=None,
                        raw_response=result.raw_response
                    )
                
                return result
                
            except Exception as e:
                log.error(f"🔍 DashScope API call failed: {e}")
                return EmbedderOutput(data=[], error=str(e), raw_response=None)
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    async def acall(
        self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED
    ):
        """Async call to the Dashscope API."""
        if not self.async_client:
            self.async_client = self.init_async_client()

        if model_type == ModelType.LLM:
            if not api_kwargs.get("stream", False):
                # For non-streaming, enable_thinking must be false.
                extra_body = api_kwargs.get("extra_body", {})
                extra_body["enable_thinking"] = False
                api_kwargs["extra_body"] = extra_body

            completion = await self.async_client.chat.completions.create(**api_kwargs)

            # For async calls with streaming enabled, wrap the AsyncStream
            # into an async generator of plain text chunks so that callers
            # can simply `async for text in response`.
            if api_kwargs.get("stream", False):

                async def async_stream_generator():
                    async for chunk in completion:
                        log.debug(f"Raw async chunk completion: {chunk}")
                        try:
                            parsed_content = parse_stream_response(chunk)
                        except Exception as e:
                            log.error(f"Error parsing async stream chunk: {e}")
                            parsed_content = None
                        if parsed_content:
                            yield parsed_content

                return async_stream_generator()
            else:
                return self.parse_chat_completion(completion)
        elif model_type == ModelType.EMBEDDER:
            # Extract input texts from api_kwargs
            texts = api_kwargs.get("input", [])
            
            if not texts:
                log.warning("😭 No input texts provided")
                return EmbedderOutput(data=[], error="No input texts provided", raw_response=None)
            
            # Ensure texts is a list
            if isinstance(texts, str):
                texts = [texts]
            
            # Filter out empty or None texts - following HuggingFace client pattern
            valid_texts = []
            valid_indices = []
            for i, text in enumerate(texts):
                if text and isinstance(text, str) and text.strip():
                    valid_texts.append(text)
                    valid_indices.append(i)
                else:
                    log.warning(f"🔍 Skipping empty or invalid text at index {i}: type={type(text)}, length={len(text) if hasattr(text, '__len__') else 'N/A'}, repr={repr(text)[:100]}")
            
            if not valid_texts:
                log.error("😭 No valid texts found after filtering")
                return EmbedderOutput(data=[], error="No valid texts found after filtering", raw_response=None)
            
            if len(valid_texts) != len(texts):
                filtered_count = len(texts) - len(valid_texts)
                log.warning(f"🔍 Filtered out {filtered_count} empty/invalid texts out of {len(texts)} total texts")
            
            # Create modified api_kwargs with only valid texts
            filtered_api_kwargs = api_kwargs.copy()
            filtered_api_kwargs["input"] = valid_texts
            
            log.info(f"🔍 DashScope async embedding API call with {len(valid_texts)} valid texts out of {len(texts)} total")
            
            try:
                response = await self.async_client.embeddings.create(**filtered_api_kwargs)
                log.info(f"🔍 DashScope async API call successful, response type: {type(response)}")
                result = self.parse_embedding_response(response)
                
                # If we filtered texts, we need to create embeddings for the original indices
                if len(valid_texts) != len(texts):
                    log.info(f"🔍 Creating embeddings for {len(texts)} original positions")
                    
                    # Get the correct embedding dimension from the first valid embedding
                    embedding_dim = 256  # Default fallback based on config
                    if result.data and len(result.data) > 0 and hasattr(result.data[0], 'embedding'):
                        embedding_dim = len(result.data[0].embedding)
                        log.info(f"🔍 Using embedding dimension: {embedding_dim}")
                    
                    final_data = []
                    valid_idx = 0
                    for i in range(len(texts)):
                        if i in valid_indices:
                            # Use the embedding from valid texts
                            final_data.append(result.data[valid_idx])
                            valid_idx += 1
                        else:
                            # Create zero embedding for filtered texts with correct dimension
                            log.warning(f"🔍 Creating zero embedding for filtered text at index {i}")
                            final_data.append(Embedding(
                                embedding=[0.0] * embedding_dim,  # Use correct embedding dimension
                                index=i
                            ))
                    
                    result = EmbedderOutput(
                        data=final_data,
                        error=None,
                        raw_response=result.raw_response
                    )
                
                return result
                
            except Exception as e:
                log.error(f"🔍 DashScope async API call failed: {e}")
                return EmbedderOutput(data=[], error=str(e), raw_response=None)
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @classmethod
    def from_dict(cls, data: Dict[str, Any]):
        """Create an instance from a dictionary."""
        return cls(**data)

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary."""
        return {
            "api_key": self._api_key,
            "workspace_id": self._workspace_id,
            "base_url": self.base_url,
            "input_type": self._input_type,
        }

    def __getstate__(self):
        """
        Customize serialization to exclude non-picklable client objects.
        This method is called by pickle when saving the object's state.
        """
        state = self.__dict__.copy()
        # Remove the unpicklable client instances
        if 'sync_client' in state:
            del state['sync_client']
        if 'async_client' in state:
            del state['async_client']
        return state

    def __setstate__(self, state):
        """
        Customize deserialization to re-create the client objects.
        This method is called by pickle when loading the object's state.
        """
        self.__dict__.update(state)
        # Re-initialize the clients after unpickling
        self.sync_client = self.init_sync_client()
        self.async_client = None  # It will be lazily initialized when acall is used


class DashScopeEmbedder(DataComponent):
    r"""
    A user-facing component that orchestrates an embedder model via the DashScope model client and output processors.

    Args:
        model_client (ModelClient): The DashScope model client to use for the embedder.
        model_kwargs (Dict[str, Any], optional): The model kwargs to pass to the model client. Defaults to {}.
        output_processors (Optional[Component], optional): The output processors after model call. Defaults to None.
    """

    model_type: ModelType = ModelType.EMBEDDER
    model_client: ModelClient
    output_processors: Optional[DataComponent]

    def __init__(
        self,
        *,
        model_client: ModelClient,
        model_kwargs: Dict[str, Any] = {},
        output_processors: Optional[DataComponent] = None,
    ) -> None:

        super().__init__(model_kwargs=model_kwargs)
        if not isinstance(model_kwargs, Dict):
            raise TypeError(
                f"{type(self).__name__} requires a dictionary for model_kwargs, not a string"
            )
        self.model_kwargs = model_kwargs.copy()

        if not isinstance(model_client, ModelClient):
            raise TypeError(
                f"{type(self).__name__} requires a ModelClient instance for model_client."
            )
        self.model_client = model_client
        self.output_processors = output_processors

    def call(
        self,
        input: EmbedderInputType,
        model_kwargs: Optional[Dict] = {},
    ) -> EmbedderOutputType:
        log.debug(f"Calling {self.__class__.__name__} with input: {input}")
        api_kwargs = self.model_client.convert_inputs_to_api_kwargs(
            input=input,
            model_kwargs=self._compose_model_kwargs(**model_kwargs),
            model_type=self.model_type,
        )
        try:
            output = self.model_client.call(
                api_kwargs=api_kwargs, model_type=self.model_type
            )
        except Exception as e:
            log.error(f"🤡 Error calling the DashScope model: {e}")
            output = EmbedderOutput(error=str(e))
        return output

    async def acall(
        self,
        input: EmbedderInputType,
        model_kwargs: Optional[Dict] = {},
    ) -> EmbedderOutputType:
        log.debug(f"Calling {self.__class__.__name__} with input: {input}")
        api_kwargs = self.model_client.convert_inputs_to_api_kwargs(
            input=input,
            model_kwargs=self._compose_model_kwargs(**model_kwargs),
            model_type=self.model_type,
        )
        output: EmbedderOutputType = None
        try:
            response = await self.model_client.acall(
                api_kwargs=api_kwargs, model_type=self.model_type
            )
            output = self.model_client.parse_embedding_response(response)
        except Exception as e:
            log.error(f"Error calling the DashScope model: {e}")
            output = EmbedderOutput(error=str(e))

        output.input = [input] if isinstance(input, str) else input
        log.debug(f"Output from {self.__class__.__name__}: {output}")
        return output

    def _compose_model_kwargs(self, **model_kwargs) -> Dict[str, object]:
        return F.compose_model_kwargs(self.model_kwargs, model_kwargs)

# Batch Embedding Components for DashScope
class DashScopeBatchEmbedder(DataComponent):
    """Batch embedder specifically designed for DashScope API"""

    def __init__(self, embedder, batch_size: int = 100, embedding_cache_file_name: str = "default") -> None:
        super().__init__(batch_size=batch_size)
        self.embedder = embedder
        self.batch_size = batch_size
        if self.batch_size > 25:
            log.warning(f"DashScope batch embedder initialization, batch size: {self.batch_size}, note that DashScope batch embedding size cannot exceed 25, automatically set to 25")
            self.batch_size = 25
        self.cache_path = f'./embedding_cache/{embedding_cache_file_name}_{self.embedder.__class__.__name__}_dashscope_embeddings.pkl'

    def call(
        self, input: BatchEmbedderInputType, model_kwargs: Optional[Dict] = {}, force_recreate: bool = False
    ) -> BatchEmbedderOutputType:
        """
        Batch call to DashScope embedder
        
        Args:
            input: List of input texts
            model_kwargs: Model parameters
            force_recreate: Whether to force recreation
            
        Returns:
            Batch embedding output
        """
        # Check cache first
        
        if not force_recreate and os.path.exists(self.cache_path):
            try:
                with open(self.cache_path, 'rb') as f:
                    embeddings = pickle.load(f)
                    log.info(f"Loaded cached DashScope embeddings from: {self.cache_path}")
                return embeddings
            except Exception as e:
                log.warning(f"Failed to load cache file {self.cache_path}: {e}, proceeding with fresh embedding")
        
        if isinstance(input, str):
            input = [input]
        
        n = len(input)
        embeddings: List[EmbedderOutput] = []
        
        log.info(f"Starting DashScope batch embedding processing, total {n} texts, batch size: {self.batch_size}")
        
        for i in tqdm(
            range(0, n, self.batch_size),
            desc="DashScope batch embedding",
            disable=False,
        ):
            batch_input = input[i : min(i + self.batch_size, n)]
            
            try:
                # Use correct calling method: directly call embedder instance
                batch_output = self.embedder(
                    input=batch_input, model_kwargs=model_kwargs
                )
                embeddings.append(batch_output)
                
                # Validate batch output
                if batch_output.error:
                    log.error(f"Batch {i//self.batch_size + 1} embedding failed: {batch_output.error}")
                elif batch_output.data:
                    log.debug(f"Batch {i//self.batch_size + 1} successfully generated {len(batch_output.data)} embedding vectors")
                else:
                    log.warning(f"Batch {i//self.batch_size + 1} returned no embedding data")
                    
            except Exception as e:
                log.error(f"Batch {i//self.batch_size + 1} processing exception: {e}")
                # Create error embedding output
                error_output = EmbedderOutput(
                    data=[],
                    error=str(e),
                    raw_response=None
                )
                embeddings.append(error_output)
        
        log.info(f"DashScope batch embedding completed, processed {len(embeddings)} batches")
        
        # Save to cache
        try:
            if not os.path.exists('./embedding_cache'):
                os.makedirs('./embedding_cache')
            with open(self.cache_path, 'wb') as f:
                pickle.dump(embeddings, f)
                log.info(f"Saved DashScope embeddings cache to: {self.cache_path}")
        except Exception as e:
            log.warning(f"Failed to save cache to {self.cache_path}: {e}")
        
        return embeddings
    
    def __call__(self, input: BatchEmbedderInputType, model_kwargs: Optional[Dict] = {}, force_recreate: bool = False) -> BatchEmbedderOutputType:
        """
        Call operator interface, delegates to call method
        """
        return self.call(input=input, model_kwargs=model_kwargs, force_recreate=force_recreate)


class DashScopeToEmbeddings(DataComponent):
    """Component that converts document sequences to embedding vector sequences, specifically optimized for DashScope API"""

    def __init__(self, embedder, batch_size: int = 100, force_recreate_db: bool = False, embedding_cache_file_name: str = "default") -> None:
        super().__init__(batch_size=batch_size)
        self.embedder = embedder
        self.batch_size = batch_size
        self.batch_embedder = DashScopeBatchEmbedder(embedder=embedder, batch_size=batch_size, embedding_cache_file_name=embedding_cache_file_name)
        self.force_recreate_db = force_recreate_db

    def __call__(self, input: List[Document]) -> List[Document]:
        """
        Process list of documents, generating embedding vectors for each document
        
        Args:
            input: List of input documents
            
        Returns:
            List of documents containing embedding vectors
        """
        output = deepcopy(input)
        
        # Convert to text list
        embedder_input: List[str] = [chunk.text for chunk in output]
        
        log.info(f"Starting to process embeddings for {len(embedder_input)} documents")
        
        # Batch process embeddings
        outputs: List[EmbedderOutput] = self.batch_embedder(
            input=embedder_input, 
            force_recreate=self.force_recreate_db
        )
        
        # Validate output
        total_embeddings = 0
        error_batches = 0
        
        for batch_output in outputs:
            if batch_output.error:
                error_batches += 1
                log.error(f"Found error batch: {batch_output.error}")
            elif batch_output.data:
                total_embeddings += len(batch_output.data)
            
        log.info(f"Embedding statistics: total {total_embeddings} valid embeddings, {error_batches} error batches")
        
        # Assign embedding vectors back to documents
        doc_idx = 0
        for batch_idx, batch_output in tqdm(
            enumerate(outputs), 
            desc="Assigning embedding vectors to documents",
            disable=False
        ):
            if batch_output.error:
                # Create empty vectors for documents in error batches
                batch_size_actual = min(self.batch_size, len(output) - doc_idx)
                log.warning(f"Creating empty vectors for {batch_size_actual} documents in batch {batch_idx}")
                
                for i in range(batch_size_actual):
                    if doc_idx < len(output):
                        output[doc_idx].vector = []
                        doc_idx += 1
            else:
                # Assign normal embedding vectors
                for embedding in batch_output.data:
                    if doc_idx < len(output):
                        if hasattr(embedding, 'embedding'):
                            output[doc_idx].vector = embedding.embedding
                        else:
                            log.warning(f"Invalid embedding format for document {doc_idx}")
                            output[doc_idx].vector = []
                        doc_idx += 1
        
        # Validate results
        valid_count = 0
        empty_count = 0
        
        for doc in output:
            if hasattr(doc, 'vector') and doc.vector and len(doc.vector) > 0:
                valid_count += 1
            else:
                empty_count += 1
        
        log.info(f"Embedding results: {valid_count} valid vectors, {empty_count} empty vectors")
        
        if valid_count == 0:
            log.error("❌ All documents have empty embedding vectors!")
        elif empty_count > 0:
            log.warning(f"⚠️ Found {empty_count} empty embedding vectors")
        else:
            log.info("✅ All documents successfully generated embedding vectors")
        
        return output

    def _extra_repr(self) -> str:
        return f"batch_size={self.batch_size}" 

================================================
FILE: api/data_pipeline.py
================================================
import adalflow as adal
from adalflow.core.types import Document, List
from adalflow.components.data_process import TextSplitter, ToEmbeddings
import os
import subprocess
import json
import tiktoken
import logging
import base64
import glob
from adalflow.utils import get_adalflow_default_root_path
from adalflow.core.db import LocalDB
from api.config import configs, DEFAULT_EXCLUDED_DIRS, DEFAULT_EXCLUDED_FILES
from api.ollama_patch import OllamaDocumentProcessor
from urllib.parse import urlparse, urlunparse, quote
import requests
from requests.exceptions import RequestException

from api.tools.embedder import get_embedder

# Configure logging
logger = logging.getLogger(__name__)

# Maximum token limit for OpenAI embedding models
MAX_EMBEDDING_TOKENS = 8192

def count_tokens(text: str, embedder_type: str = None, is_ollama_embedder: bool = None) -> int:
    """
    Count the number of tokens in a text string using tiktoken.

    Args:
        text (str): The text to count tokens for.
        embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama', 'bedrock').
                                     If None, will be determined from configuration.
        is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                           If None, will be determined from configuration.

    Returns:
        int: The number of tokens in the text.
    """
    try:
        # Handle backward compatibility
        if embedder_type is None and is_ollama_embedder is not None:
            embedder_type = 'ollama' if is_ollama_embedder else None
        
        # Determine embedder type if not specified
        if embedder_type is None:
            from api.config import get_embedder_type
            embedder_type = get_embedder_type()

        # Choose encoding based on embedder type
        if embedder_type == 'ollama':
            # Ollama typically uses cl100k_base encoding
            encoding = tiktoken.get_encoding("cl100k_base")
        elif embedder_type == 'google':
            # Google uses similar tokenization to GPT models for rough estimation
            encoding = tiktoken.get_encoding("cl100k_base")
        elif embedder_type == 'bedrock':
            # Bedrock embedding models vary; use a common GPT-like encoding for rough estimation
            encoding = tiktoken.get_encoding("cl100k_base")
        else:  # OpenAI or default
            # Use OpenAI embedding model encoding
            encoding = tiktoken.encoding_for_model("text-embedding-3-small")

        return len(encoding.encode(text))
    except Exception as e:
        # Fallback to a simple approximation if tiktoken fails
        logger.warning(f"Error counting tokens with tiktoken: {e}")
        # Rough approximation: 4 characters per token
        return len(text) // 4

def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_token: str = None) -> str:
    """
    Downloads a Git repository (GitHub, GitLab, or Bitbucket) to a specified local path.

    Args:
        repo_type(str): Type of repository
        repo_url (str): The URL of the Git repository to clone.
        local_path (str): The local directory where the repository will be cloned.
        access_token (str, optional): Access token for private repositories.

    Returns:
        str: The output message from the `git` command.
    """
    try:
        # Check if Git is installed
        logger.info(f"Preparing to clone repository to {local_path}")
        subprocess.run(
            ["git", "--version"],
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        # Check if repository already exists
        if os.path.exists(local_path) and os.listdir(local_path):
            # Directory exists and is not empty
            logger.warning(f"Repository already exists at {local_path}. Using existing repository.")
            return f"Using existing repository at {local_path}"

        # Ensure the local path exists
        os.makedirs(local_path, exist_ok=True)

        # Prepare the clone URL with access token if provided
        clone_url = repo_url
        if access_token:
            parsed = urlparse(repo_url)
            # URL-encode the token to handle special characters
            encoded_token = quote(access_token, safe='')
            # Determine the repository type and format the URL accordingly
            if repo_type == "github":
                # Format: https://{token}@{domain}/owner/repo.git
                # Works for both github.com and enterprise GitHub domains
                clone_url = urlunparse((parsed.scheme, f"{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
            elif repo_type == "gitlab":
                # Format: https://oauth2:{token}@gitlab.com/owner/repo.git
                clone_url = urlunparse((parsed.scheme, f"oauth2:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
            elif repo_type == "bitbucket":
                # Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
                clone_url = urlunparse((parsed.scheme, f"x-token-auth:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))

            logger.info("Using access token for authentication")

        # Clone the repository
        logger.info(f"Cloning repository from {repo_url} to {local_path}")
        # We use repo_url in the log to avoid exposing the token in logs
        result = subprocess.run(
            ["git", "clone", "--depth=1", "--single-branch", clone_url, local_path],
            check=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )

        logger.info("Repository cloned successfully")
        return result.stdout.decode("utf-8")

    except subprocess.CalledProcessError as e:
        error_msg = e.stderr.decode('utf-8')
        # Sanitize error message to remove any tokens (both raw and URL-encoded)
        if access_token:
            # Remove raw token
            error_msg = error_msg.replace(access_token, "***TOKEN***")
            # Also remove URL-encoded token to prevent leaking encoded version
            encoded_token = quote(access_token, safe='')
            error_msg = error_msg.replace(encoded_token, "***TOKEN***")
        raise ValueError(f"Error during cloning: {error_msg}")
    except Exception as e:
        raise ValueError(f"An unexpected error occurred: {str(e)}")

# Alias for backward compatibility
download_github_repo = download_repo

def read_all_documents(path: str, embedder_type: str = None, is_ollama_embedder: bool = None, 
                      excluded_dirs: List[str] = None, excluded_files: List[str] = None,
                      included_dirs: List[str] = None, included_files: List[str] = None):
    """
    Recursively reads all documents in a directory and its subdirectories.

    Args:
        path (str): The root directory path.
        embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama').
                                     If None, will be determined from configuration.
        is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                           If None, will be determined from configuration.
        excluded_dirs (List[str], optional): List of directories to exclude from processing.
            Overrides the default configuration if provided.
        excluded_files (List[str], optional): List of file patterns to exclude from processing.
            Overrides the default configuration if provided.
        included_dirs (List[str], optional): List of directories to include exclusively.
            When provided, only files in these directories will be processed.
        included_files (List[str], optional): List of file patterns to include exclusively.
            When provided, only files matching these patterns will be processed.

    Returns:
        list: A list of Document objects with metadata.
    """
    # Handle backward compatibility
    if embedder_type is None and is_ollama_embedder is not None:
        embedder_type = 'ollama' if is_ollama_embedder else None
    documents = []
    # File extensions to look for, prioritizing code files
    code_extensions = [".py", ".js", ".ts", ".java", ".cpp", ".c", ".h", ".hpp", ".go", ".rs",
                       ".jsx", ".tsx", ".html", ".css", ".php", ".swift", ".cs"]
    doc_extensions = [".md", ".txt", ".rst", ".json", ".yaml", ".yml"]

    # Determine filtering mode: inclusion or exclusion
    use_inclusion_mode = (included_dirs is not None and len(included_dirs) > 0) or (included_files is not None and len(included_files) > 0)

    if use_inclusion_mode:
        # Inclusion mode: only process specified directories and files
        final_included_dirs = set(included_dirs) if included_dirs else set()
        final_included_files = set(included_files) if included_files else set()

        logger.info(f"Using inclusion mode")
        logger.info(f"Included directories: {list(final_included_dirs)}")
        logger.info(f"Included files: {list(final_included_files)}")

        # Convert to lists for processing
        included_dirs = list(final_included_dirs)
        included_files = list(final_included_files)
        excluded_dirs = []
        excluded_files = []
    else:
        # Exclusion mode: use default exclusions plus any additional ones
        final_excluded_dirs = set(DEFAULT_EXCLUDED_DIRS)
        final_excluded_files = set(DEFAULT_EXCLUDED_FILES)

        # Add any additional excluded directories from config
        if "file_filters" in configs and "excluded_dirs" in configs["file_filters"]:
            final_excluded_dirs.update(configs["file_filters"]["excluded_dirs"])

        # Add any additional excluded files from config
        if "file_filters" in configs and "excluded_files" in configs["file_filters"]:
            final_excluded_files.update(configs["file_filters"]["excluded_files"])

        # Add any explicitly provided excluded directories and files
        if excluded_dirs is not None:
            final_excluded_dirs.update(excluded_dirs)

        if excluded_files is not None:
            final_excluded_files.update(excluded_files)

        # Convert back to lists for compatibility
        excluded_dirs = list(final_excluded_dirs)
        excluded_files = list(final_excluded_files)
        included_dirs = []
        included_files = []

        logger.info(f"Using exclusion mode")
        logger.info(f"Excluded directories: {excluded_dirs}")
        logger.info(f"Excluded files: {excluded_files}")

    logger.info(f"Reading documents from {path}")

    def should_process_file(file_path: str, use_inclusion: bool, included_dirs: List[str], included_files: List[str],
                           excluded_dirs: List[str], excluded_files: List[str]) -> bool:
        """
        Determine if a file should be processed based on inclusion/exclusion rules.

        Args:
            file_path (str): The file path to check
            use_inclusion (bool): Whether to use inclusion mode
            included_dirs (List[str]): List of directories to include
            included_files (List[str]): List of files to include
            excluded_dirs (List[str]): List of directories to exclude
            excluded_files (List[str]): List of files to exclude

        Returns:
            bool: True if the file should be processed, False otherwise
        """
        file_path_parts = os.path.normpath(file_path).split(os.sep)
        file_name = os.path.basename(file_path)

        if use_inclusion:
            # Inclusion mode: file must be in included directories or match included files
            is_included = False

            # Check if file is in an included directory
            if included_dirs:
                for included in included_dirs:
                    clean_included = included.strip("./").rstrip("/")
                    if clean_included in file_path_parts:
                        is_included = True
                        break

            # Check if file matches included file patterns
            if not is_included and included_files:
                for included_file in included_files:
                    if file_name == included_file or file_name.endswith(included_file):
                        is_included = True
                        break

            # If no inclusion rules are specified for a category, allow all files from that category
            if not included_dirs and not included_files:
                is_included = True
            elif not included_dirs and included_files:
                # Only file patterns specified, allow all directories
                pass  # is_included is already set based on file patterns
            elif included_dirs and not included_files:
                # Only directory patterns specified, allow all files in included directories
                pass  # is_included is already set based on directory patterns

            return is_included
        else:
            # Exclusion mode: file must not be in excluded directories or match excluded files
            is_excluded = False

            # Check if file is in an excluded directory
            for excluded in excluded_dirs:
                clean_excluded = excluded.strip("./").rstrip("/")
                if clean_excluded in file_path_parts:
                    is_excluded = True
                    break

            # Check if file matches excluded file patterns
            if not is_excluded:
                for excluded_file in excluded_files:
                    if file_name == excluded_file:
                        is_excluded = True
                        break

            return not is_excluded

    # Process code files first
    for ext in code_extensions:
        files = glob.glob(f"{path}/**/*{ext}", recursive=True)
        for file_path in files:
            # Check if file should be processed based on inclusion/exclusion rules
            if not should_process_file(file_path, use_inclusion_mode, included_dirs, included_files, excluded_dirs, excluded_files):
                continue

            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    relative_path = os.path.relpath(file_path, path)

                    # Determine if this is an implementation file
                    is_implementation = (
                        not relative_path.startswith("test_")
                        and not relative_path.startswith("app_")
                        and "test" not in relative_path.lower()
                    )

                    # Check token count
                    token_count = count_tokens(content, embedder_type)
                    if token_count > MAX_EMBEDDING_TOKENS * 10:
                        logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit")
                        continue

                    doc = Document(
                        text=content,
                        meta_data={
                            "file_path": relative_path,
                            "type": ext[1:],
                            "is_code": True,
                            "is_implementation": is_implementation,
                            "title": relative_path,
                            "token_count": token_count,
                        },
                    )
                    documents.append(doc)
            except Exception as e:
                logger.error(f"Error reading {file_path}: {e}")

    # Then process documentation files
    for ext in doc_extensions:
        files = glob.glob(f"{path}/**/*{ext}", recursive=True)
        for file_path in files:
            # Check if file should be processed based on inclusion/exclusion rules
            if not should_process_file(file_path, use_inclusion_mode, included_dirs, included_files, excluded_dirs, excluded_files):
                continue

            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    relative_path = os.path.relpath(file_path, path)

                    # Check token count
                    token_count = count_tokens(content, embedder_type)
                    if token_count > MAX_EMBEDDING_TOKENS:
                        logger.warning(f"Skipping large file {relative_path}: Token count ({token_count}) exceeds limit")
                        continue

                    doc = Document(
                        text=content,
                        meta_data={
                            "file_path": relative_path,
                            "type": ext[1:],
                            "is_code": False,
                            "is_implementation": False,
                            "title": relative_path,
                            "token_count": token_count,
                        },
                    )
                    documents.append(doc)
            except Exception as e:
                logger.error(f"Error reading {file_path}: {e}")

    logger.info(f"Found {len(documents)} documents")
    return documents

def prepare_data_pipeline(embedder_type: str = None, is_ollama_embedder: bool = None):
    """
    Creates and returns the data transformation pipeline.

    Args:
        embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama').
                                     If None, will be determined from configuration.
        is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                           If None, will be determined from configuration.

    Returns:
        adal.Sequential: The data transformation pipeline
    """
    from api.config import get_embedder_config, get_embedder_type

    # Handle backward compatibility
    if embedder_type is None and is_ollama_embedder is not None:
        embedder_type = 'ollama' if is_ollama_embedder else None
    
    # Determine embedder type if not specified
    if embedder_type is None:
        embedder_type = get_embedder_type()

    splitter = TextSplitter(**configs["text_splitter"])
    embedder_config = get_embedder_config()

    embedder = get_embedder(embedder_type=embedder_type)

    # Choose appropriate processor based on embedder type
    if embedder_type == 'ollama':
        # Use Ollama document processor for single-document processing
        embedder_transformer = OllamaDocumentProcessor(embedder=embedder)
    else:
        # Use batch processing for OpenAI and Google embedders
        batch_size = embedder_config.get("batch_size", 500)
        embedder_transformer = ToEmbeddings(
            embedder=embedder, batch_size=batch_size
        )

    data_transformer = adal.Sequential(
        splitter, embedder_transformer
    )  # sequential will chain together splitter and embedder
    return data_transformer

def transform_documents_and_save_to_db(
    documents: List[Document], db_path: str, embedder_type: str = None, is_ollama_embedder: bool = None
) -> LocalDB:
    """
    Transforms a list of documents and saves them to a local database.

    Args:
        documents (list): A list of `Document` objects.
        db_path (str): The path to the local database file.
        embedder_type (str, optional): The embedder type ('openai', 'google', 'ollama').
                                     If None, will be determined from configuration.
        is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                           If None, will be determined from configuration.
    """
    # Get the data transformer
    data_transformer = prepare_data_pipeline(embedder_type, is_ollama_embedder)

    # Save the documents to a local database
    db = LocalDB()
    db.register_transformer(transformer=data_transformer, key="split_and_embed")
    db.load(documents)
    db.transform(key="split_and_embed")
    os.makedirs(os.path.dirname(db_path), exist_ok=True)
    db.save_state(filepath=db_path)
    return db

def get_github_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
    """
    Retrieves the content of a file from a GitHub repository using the GitHub API.
    Supports both public GitHub (github.com) and GitHub Enterprise (custom domains).
    
    Args:
        repo_url (str): The URL of the GitHub repository 
                       (e.g., "https://github.com/username/repo" or "https://github.company.com/username/repo")
        file_path (str): The path to the file within the repository (e.g., "src/main.py")
        access_token (str, optional): GitHub personal access token for private repositories

    Returns:
        str: The content of the file as a string

    Raises:
        ValueError: If the file cannot be fetched or if the URL is not a valid GitHub URL
    """
    try:
        # Parse the repository URL to support both github.com and enterprise GitHub
        parsed_url = urlparse(repo_url)
        if not parsed_url.scheme or not parsed_url.netloc:
            raise ValueError("Not a valid GitHub repository URL")

        # Check if it's a GitHub-like URL structure
        path_parts = parsed_url.path.strip('/').split('/')
        if len(path_parts) < 2:
            raise ValueError("Invalid GitHub URL format - expected format: https://domain/owner/repo")

        owner = path_parts[-2]
        repo = path_parts[-1].replace(".git", "")

        # Determine the API base URL
        if parsed_url.netloc == "github.com":
            # Public GitHub
            api_base = "https://api.github.com"
        else:
            # GitHub Enterprise - API is typically at https://domain/api/v3/
            api_base = f"{parsed_url.scheme}://{parsed_url.netloc}/api/v3"
        
        # Use GitHub API to get file content
        # The API endpoint for getting file content is: /repos/{owner}/{repo}/contents/{path}
        api_url = f"{api_base}/repos/{owner}/{repo}/contents/{file_path}"

        # Fetch file content from GitHub API
        headers = {}
        if access_token:
            headers["Authorization"] = f"token {access_token}"
        logger.info(f"Fetching file content from GitHub API: {api_url}")
        try:
            response = requests.get(api_url, headers=headers)
            response.raise_for_status()
        except RequestException as e:
            raise ValueError(f"Error fetching file content: {e}")
        try:
            content_data = response.json()
        except json.JSONDecodeError:
            raise ValueError("Invalid response from GitHub API")

        # Check if we got an error response
        if "message" in content_data and "documentation_url" in content_data:
            raise ValueError(f"GitHub API error: {content_data['message']}")

        # GitHub API returns file content as base64 encoded string
        if "content" in content_data and "encoding" in content_data:
            if content_data["encoding"] == "base64":
                # The content might be split into lines, so join them first
                content_base64 = content_data["content"].replace("\n", "")
                content = base64.b64decode(content_base64).decode("utf-8")
                return content
            else:
                raise ValueError(f"Unexpected encoding: {content_data['encoding']}")
        else:
            raise ValueError("File content not found in GitHub API response")

    except Exception as e:
        raise ValueError(f"Failed to get file content: {str(e)}")

def get_gitlab_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
    """
    Retrieves the content of a file from a GitLab repository (cloud or self-hosted).

    Args:
        repo_url (str): The GitLab repo URL (e.g., "https://gitlab.com/username/repo" or "http://localhost/group/project")
        file_path (str): File path within the repository (e.g., "src/main.py")
        access_token (str, optional): GitLab personal access token

    Returns:
        str: File content

    Raises:
        ValueError: If anything fails
    """
    try:
        # Parse and validate the URL
        parsed_url = urlparse(repo_url)
        if not parsed_url.scheme or not parsed_url.netloc:
            raise ValueError("Not a valid GitLab repository URL")

        gitlab_domain = f"{parsed_url.scheme}://{parsed_url.netloc}"
        if parsed_url.port not in (None, 80, 443):
            gitlab_domain += f":{parsed_url.port}"
        path_parts = parsed_url.path.strip("/").split("/")
        if len(path_parts) < 2:
            raise ValueError("Invalid GitLab URL format — expected something like https://gitlab.domain.com/group/project")

        # Build project path and encode for API
        project_path = "/".join(path_parts).replace(".git", "")
        encoded_project_path = quote(project_path, safe='')

        # Encode file path
        encoded_file_path = quote(file_path, safe='')

        # Try to get the default branch from the project info
        default_branch = None
        try:
            project_info_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}"
            project_headers = {}
            if access_token:
                project_headers["PRIVATE-TOKEN"] = access_token
            
            project_response = requests.get(project_info_url, headers=project_headers)
            if project_response.status_code == 200:
                project_data = project_response.json()
                default_branch = project_data.get('default_branch', 'main')
                logger.info(f"Found default branch: {default_branch}")
            else:
                logger.warning(f"Could not fetch project info, using 'main' as default branch")
                default_branch = 'main'
        except Exception as e:
            logger.warning(f"Error fetching project info: {e}, using 'main' as default branch")
            default_branch = 'main'

        api_url = f"{gitlab_domain}/api/v4/projects/{encoded_project_path}/repository/files/{encoded_file_path}/raw?ref={default_branch}"
        # Fetch file content from GitLab API
        headers = {}
        if access_token:
            headers["PRIVATE-TOKEN"] = access_token
        logger.info(f"Fetching file content from GitLab API: {api_url}")
        try:
            response = requests.get(api_url, headers=headers)
            response.raise_for_status()
            content = response.text
        except RequestException as e:
            raise ValueError(f"Error fetching file content: {e}")

        # Check for GitLab error response (JSON instead of raw file)
        if content.startswith("{") and '"message":' in content:
            try:
                error_data = json.loads(content)
                if "message" in error_data:
                    raise ValueError(f"GitLab API error: {error_data['message']}")
            except json.JSONDecodeError:
                pass

        return content

    except Exception as e:
        raise ValueError(f"Failed to get file content: {str(e)}")

def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str = None) -> str:
    """
    Retrieves the content of a file from a Bitbucket repository using the Bitbucket API.

    Args:
        repo_url (str): The URL of the Bitbucket repository (e.g., "https://bitbucket.org/username/repo")
        file_path (str): The path to the file within the repository (e.g., "src/main.py")
        access_token (str, optional): Bitbucket personal access token for private repositories

    Returns:
        str: The content of the file as a string
    """
    try:
        # Extract owner and repo name from Bitbucket URL
        if not (repo_url.startswith("https://bitbucket.org/") or repo_url.startswith("http://bitbucket.org/")):
            raise ValueError("Not a valid Bitbucket repository URL")

        parts = repo_url.rstrip('/').split('/')
        if len(parts) < 5:
            raise ValueError("Invalid Bitbucket URL format")

        owner = parts[-2]
        repo = parts[-1].replace(".git", "")

        # Try to get the default branch from the repository info
        default_branch = None
        try:
            repo_info_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}"
            repo_headers = {}
            if access_token:
                repo_headers["Authorization"] = f"Bearer {access_token}"
            
            repo_response = requests.get(repo_info_url, headers=repo_headers)
            if repo_response.status_code == 200:
                repo_data = repo_response.json()
                default_branch = repo_data.get('mainbranch', {}).get('name', 'main')
                logger.info(f"Found default branch: {default_branch}")
            else:
                logger.warning(f"Could not fetch repository info, using 'main' as default branch")
                default_branch = 'main'
        except Exception as e:
            logger.warning(f"Error fetching repository info: {e}, using 'main' as default branch")
            default_branch = 'main'

        # Use Bitbucket API to get file content
        # The API endpoint for getting file content is: /2.0/repositories/{owner}/{repo}/src/{branch}/{path}
        api_url = f"https://api.bitbucket.org/2.0/repositories/{owner}/{repo}/src/{default_branch}/{file_path}"

        # Fetch file content from Bitbucket API
        headers = {}
        if access_token:
            headers["Authorization"] = f"Bearer {access_token}"
        logger.info(f"Fetching file content from Bitbucket API: {api_url}")
        try:
            response = requests.get(api_url, headers=headers)
            if response.status_code == 200:
                content = response.text
            elif response.status_code == 404:
                raise ValueError("File not found on Bitbucket. Please check the file path and repository.")
            elif response.status_code == 401:
                raise ValueError("Unauthorized access to Bitbucket. Please check your access token.")
            elif response.status_code == 403:
                raise ValueError("Forbidden access to Bitbucket. You might not have permission to access this file.")
            elif response.status_code == 500:
                raise ValueError("Internal server error on Bitbucket. Please try again later.")
            else:
                response.raise_for_status()
                content = response.text
            return content
        except RequestException as e:
            raise ValueError(f"Error fetching file content: {e}")

    except Exception as e:
        raise ValueError(f"Failed to get file content: {str(e)}")


def get_file_content(repo_url: str, file_path: str, repo_type: str = None, access_token: str = None) -> str:
    """
    Retrieves the content of a file from a Git repository (GitHub or GitLab).

    Args:
        repo_type (str): Type of repository
        repo_url (str): The URL of the repository
        file_path (str): The path to the file within the repository
        access_token (str, optional): Access token for private repositories

    Returns:
        str: The content of the file as a string

    Raises:
        ValueError: If the file cannot be fetched or if the URL is not valid
    """
    if repo_type == "github":
        return get_github_file_content(repo_url, file_path, access_token)
    elif repo_type == "gitlab":
        return get_gitlab_file_content(repo_url, file_path, access_token)
    elif repo_type == "bitbucket":
        return get_bitbucket_file_content(repo_url, file_path, access_token)
    else:
        raise ValueError("Unsupported repository type. Only GitHub, GitLab, and Bitbucket are supported.")

class DatabaseManager:
    """
    Manages the creation, loading, transformation, and persistence of LocalDB instances.
    """

    def __init__(self):
        self.db = None
        self.repo_url_or_path = None
        self.repo_paths = None

    def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None,
                         embedder_type: str = None, is_ollama_embedder: bool = None,
                         excluded_dirs: List[str] = None, excluded_files: List[str] = None,
                         included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
        """
        Create a new database from the repository.

        Args:
            repo_type(str): Type of repository
            repo_url_or_path (str): The URL or local path of the repository
            access_token (str, optional): Access token for private repositories
            embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama').
                                         If None, will be determined from configuration.
            is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                               If None, will be determined from configuration.
            excluded_dirs (List[str], optional): List of directories to exclude from processing
            excluded_files (List[str], optional): List of file patterns to exclude from processing
            included_dirs (List[str], optional): List of directories to include exclusively
            included_files (List[str], optional): List of file patterns to include exclusively

        Returns:
            List[Document]: List of Document objects
        """
        # Handle backward compatibility
        if embedder_type is None and is_ollama_embedder is not None:
            embedder_type = 'ollama' if is_ollama_embedder else None
        
        self.reset_database()
        self._create_repo(repo_url_or_path, repo_type, access_token)
        return self.prepare_db_index(embedder_type=embedder_type, excluded_dirs=excluded_dirs, excluded_files=excluded_files,
                                   included_dirs=included_dirs, included_files=included_files)

    def reset_database(self):
        """
        Reset the database to its initial state.
        """
        self.db = None
        self.repo_url_or_path = None
        self.repo_paths = None

    def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) -> str:
        # Extract owner and repo name to create unique identifier
        url_parts = repo_url_or_path.rstrip('/').split('/')

        if repo_type in ["github", "gitlab", "bitbucket"] and len(url_parts) >= 5:
            # GitHub URL format: https://github.com/owner/repo
            # GitLab URL format: https://gitlab.com/owner/repo or https://gitlab.com/group/subgroup/repo
            # Bitbucket URL format: https://bitbucket.org/owner/repo
            owner = url_parts[-2]
            repo = url_parts[-1].replace(".git", "")
            repo_name = f"{owner}_{repo}"
        else:
            repo_name = url_parts[-1].replace(".git", "")
        return repo_name

    def _create_repo(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None) -> None:
        """
        Download and prepare all paths.
        Paths:
        ~/.adalflow/repos/{owner}_{repo_name} (for url, local path will be the same)
        ~/.adalflow/databases/{owner}_{repo_name}.pkl

        Args:
            repo_type(str): Type of repository
            repo_url_or_path (str): The URL or local path of the repository
            access_token (str, optional): Access token for private repositories
        """
        logger.info(f"Preparing repo storage for {repo_url_or_path}...")

        try:
            # Strip whitespace to handle URLs with leading/trailing spaces
            repo_url_or_path = repo_url_or_path.strip()
            
            root_path = get_adalflow_default_root_path()

            os.makedirs(root_path, exist_ok=True)
            # url
            if repo_url_or_path.startswith("https://") or repo_url_or_path.startswith("http://"):
                # Extract the repository name from the URL
                repo_name = self._extract_repo_name_from_url(repo_url_or_path, repo_type)
                logger.info(f"Extracted repo name: {repo_name}")

                save_repo_dir = os.path.join(root_path, "repos", repo_name)

                # Check if the repository directory already exists and is not empty
                if not (os.path.exists(save_repo_dir) and os.listdir(save_repo_dir)):
                    # Only download if the repository doesn't exist or is empty
                    download_repo(repo_url_or_path, save_repo_dir, repo_type, access_token)
                else:
                    logger.info(f"Repository already exists at {save_repo_dir}. Using existing repository.")
            else:  # local path
                repo_name = os.path.basename(repo_url_or_path)
                save_repo_dir = repo_url_or_path

            save_db_file = os.path.join(root_path, "databases", f"{repo_name}.pkl")
            os.makedirs(save_repo_dir, exist_ok=True)
            os.makedirs(os.path.dirname(save_db_file), exist_ok=True)

            self.repo_paths = {
                "save_repo_dir": save_repo_dir,
                "save_db_file": save_db_file,
            }
            self.repo_url_or_path = repo_url_or_path
            logger.info(f"Repo paths: {self.repo_paths}")

        except Exception as e:
            logger.error(f"Failed to create repository structure: {e}")
            raise

    def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool = None, 
                        excluded_dirs: List[str] = None, excluded_files: List[str] = None,
                        included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
        """
        Prepare the indexed database for the repository.

        Args:
            embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama').
                                         If None, will be determined from configuration.
            is_ollama_embedder (bool, optional): DEPRECATED. Use embedder_type instead.
                                               If None, will be determined from configuration.
            excluded_dirs (List[str], optional): List of directories to exclude from processing
            excluded_files (List[str], optional): List of file patterns to exclude from processing
            included_dirs (List[str], optional): List of directories to include exclusively
            included_files (List[str], optional): List of file patterns to include exclusively

        Returns:
            List[Document]: List of Document objects
        """
        def _embedding_vector_length(doc: Document) -> int:
            vector = getattr(doc, "vector", None)
            if vector is None:
                return 0
            try:
                if hasattr(vector, "shape"):
                    if len(vector.shape) == 0:
                        return 0
                    return int(vector.shape[-1])
                if hasattr(vector, "__len__"):
                    return int(len(vector))
            except Exception:
                return 0
            return 0

        # Handle backward compatibility
        if embedder_type is None and is_ollama_embedder is not None:
            embedder_type = 'ollama' if is_ollama_embedder else None
        # check the database
        if self.repo_paths and os.path.exists(self.repo_paths["save_db_file"]):
            logger.info("Loading existing database...")
            try:
                self.db = LocalDB.load_state(self.repo_paths["save_db_file"])
                documents = self.db.get_transformed_data(key="split_and_embed")
                if documents:
                    lengths = [_embedding_vector_length(doc) for doc in documents]
                    non_empty = sum(1 for n in lengths if n > 0)
                    empty = len(lengths) - non_empty
                    sample_sizes = sorted({n for n in lengths if n > 0})[:3]
                    logger.info(
                        "Loaded %s documents from existing database (embeddings: %s non-empty, %s empty; sample_dims=%s)",
                        len(documents),
                        non_empty,
                        empty,
                        sample_sizes,
                    )

                    if non_empty == 0:
                        logger.warning(
                            "Existing database contains no usable embeddings. Rebuilding embeddings..."
                        )
                    else:
                        return documents
            except Exception as e:
                logger.error(f"Error loading existing database: {e}")
                # Continue to create a new database

        # prepare the database
        logger.info("Creating new database...")
        documents = read_all_documents(
            self.repo_paths["save_repo_dir"],
            embedder_type=embedder_type,
            excluded_dirs=excluded_dirs,
            excluded_files=excluded_files,
            included_dirs=included_dirs,
            included_files=included_files
        )
        self.db = transform_documents_and_save_to_db(
            documents, self.repo_paths["save_db_file"], embedder_type=embedder_type
        )
        logger.info(f"Total documents: {len(documents)}")
        transformed_docs = self.db.get_transformed_data(key="split_and_embed")
        logger.info(f"Total transformed documents: {len(transformed_docs)}")
        return transformed_docs

    def prepare_retriever(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None):
        """
        Prepare the retriever for a repository.
        This is a compatibility method for the isolated API.

        Args:
            repo_type(str): Type of repository
            repo_url_or_path (str): The URL or local path of the repository
            access_token (str, optional): Access token for private repositories

        Returns:
            List[Document]: List of Document objects
        """
        return self.prepare_database(repo_url_or_path, repo_type, access_token)


================================================
FILE: api/google_embedder_client.py
================================================
"""Google AI Embeddings ModelClient integration."""

import os
import logging
import backoff
from typing import Dict, Any, Optional, List, Sequence

from adalflow.core.model_client import ModelClient
from adalflow.core.types import ModelType, EmbedderOutput

try:
    import google.generativeai as genai
    from google.generativeai.types.text_types import EmbeddingDict, BatchEmbeddingDict
except ImportError:
    raise ImportError("google-generativeai is required. Install it with 'pip install google-generativeai'")

log = logging.getLogger(__name__)


class GoogleEmbedderClient(ModelClient):
    __doc__ = r"""A component wrapper for Google AI Embeddings API client.

    This client provides access to Google's embedding models through the Google AI API.
    It supports text embeddings for various tasks including semantic similarity,
    retrieval, and classification.

    Args:
        api_key (Optional[str]): Google AI API key. Defaults to None.
            If not provided, will use the GOOGLE_API_KEY environment variable.
        env_api_key_name (str): Environment variable name for the API key.
            Defaults to "GOOGLE_API_KEY".

    Example:
        ```python
        from api.google_embedder_client import GoogleEmbedderClient
        import adalflow as adal

        client = GoogleEmbedderClient()
        embedder = adal.Embedder(
            model_client=client,
            model_kwargs={
                "model": "gemini-embedding-001",
                "task_type": "SEMANTIC_SIMILARITY"
            }
        )
        ```

    References:
        - Google AI Embeddings: https://ai.google.dev/gemini-api/docs/embeddings
        - Available models: gemini-embedding-001
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        env_api_key_name: str = "GOOGLE_API_KEY",
    ):
        """Initialize Google AI Embeddings client.
        
        Args:
            api_key: Google AI API key. If not provided, uses environment variable.
            env_api_key_name: Name of environment variable containing API key.
        """
        super().__init__()
        self._api_key = api_key
        self._env_api_key_name = env_api_key_name
        self._initialize_client()

    def _initialize_client(self):
        """Initialize the Google AI client with API key."""
        api_key = self._api_key or os.getenv(self._env_api_key_name)
        if not api_key:
            raise ValueError(
                f"Environment variable {self._env_api_key_name} must be set"
            )
        genai.configure(api_key=api_key)

    def parse_embedding_response(self, response) -> EmbedderOutput:
        """Parse Google AI embedding response to EmbedderOutput format.
        
        Args:
            response: Google AI embedding response (EmbeddingDict or BatchEmbeddingDict)
            
        Returns:
            EmbedderOutput with parsed embeddings
        """
        try:
            from adalflow.core.types import Embedding
            
            embedding_data = []

            def _extract_embedding_value(obj):
                if obj is None:
                    return None
                if isinstance(obj, dict):
                    if "embedding" in obj:
                        return obj.get("embedding")
                    if "embeddings" in obj:
                        return obj.get("embeddings")
                if hasattr(obj, "embedding"):
                    return getattr(obj, "embedding")
                if hasattr(obj, "embeddings"):
                    return getattr(obj, "embeddings")
                for method_name in ("model_dump", "to_dict", "dict"):
                    if hasattr(obj, method_name):
                        try:
                            dumped = getattr(obj, method_name)()
                            if isinstance(dumped, dict):
                                if "embedding" in dumped:
                                    return dumped.get("embedding")
                                if "embeddings" in dumped:
                                    return dumped.get("embeddings")
                        except Exception:
                            pass
                return None
            
            embedding_value = _extract_embedding_value(response)
            if embedding_value is None:
                log.warning("Unexpected embedding response type/structure: %s", type(response))
                embedding_data = []
            elif isinstance(embedding_value, list) and len(embedding_value) > 0:
                if isinstance(embedding_value[0], (int, float)):
                    embedding_data = [Embedding(embedding=embedding_value, index=0)]
                elif isinstance(embedding_value[0], list):
                    embedding_data = [
                        Embedding(embedding=emb_list, index=i)
                        for i, emb_list in enumerate(embedding_value)
                        if isinstance(emb_list, list) and len(emb_list) > 0
                    ]
                else:
                    extracted = []
                    for item in embedding_value:
                        item_emb = _extract_embedding_value(item)
                        if isinstance(item_emb, list) and len(item_emb) > 0:
                            extracted.append(item_emb)
                    embedding_data = [
                        Embedding(embedding=emb_list, index=i)
                        for i, emb_list in enumerate(extracted)
                    ]
            else:
                log.warning("Empty or invalid embedding data parsed from response")
                embedding_data = []

            if embedding_data:
                first_dim = len(embedding_data[0].embedding) if embedding_data[0].embedding is not None else 0
                log.info("Parsed %s embedding(s) (dim=%s)", len(embedding_data), first_dim)
            
            return EmbedderOutput(
                data=embedding_data,
                error=None,
                raw_response=response
            )
        except Exception as e:
            log.error(f"Error parsing Google AI embedding response: {e}")
            return EmbedderOutput(
                data=[],
                error=str(e),
                raw_response=response
            )

    def convert_inputs_to_api_kwargs(
        self,
        input: Optional[Any] = None,
        model_kwargs: Dict = {},
        model_type: ModelType = ModelType.UNDEFINED,
    ) -> Dict:
        """Convert inputs to Google AI API format.
        
        Args:
            input: Text input(s) to embed
            model_kwargs: Model parameters including model name and task_type
            model_type: Should be ModelType.EMBEDDER for this client
            
        Returns:
            Dict: API kwargs for Google AI embedding call
        """
        if model_type != ModelType.EMBEDDER:
            raise ValueError(f"GoogleEmbedderClient only supports EMBEDDER model type, got {model_type}")
        
        # Ensure input is a list
        if isinstance(input, str):
            content = [input]
        elif isinstance(input, Sequence):
            content = list(input)
        else:
            raise TypeError("input must be a string or sequence of strings")
        
        final_model_kwargs = model_kwargs.copy()
        
        # Handle single vs batch embedding
        if len(content) == 1:
            final_model_kwargs["content"] = content[0]
        else:
            final_model_kwargs["contents"] = content
            
        # Set default task type if not provided
        if "task_type" not in final_model_kwargs:
            final_model_kwargs["task_type"] = "SEMANTIC_SIMILARITY"
            
        # Set default model if not provided
        if "model" not in final_model_kwargs:
            final_model_kwargs["model"] = "gemini-embedding-001"
            
        return final_model_kwargs

    @backoff.on_exception(
        backoff.expo,
        (Exception,),  # Google AI may raise various exceptions
        max_time=5,
    )
    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
        """Call Google AI embedding API.
        
        Args:
            api_kwargs: API parameters
            model_type: Should be ModelType.EMBEDDER
            
        Returns:
            Google AI embedding response
        """
        if model_type != ModelType.EMBEDDER:
            raise ValueError(f"GoogleEmbedderClient only supports EMBEDDER model type")
            
        safe_log_kwargs = {k: v for k, v in api_kwargs.items() if k not in {"content", "contents"}}
        if "content" in api_kwargs:
            safe_log_kwargs["content_chars"] = len(str(api_kwargs.get("content", "")))
        if "contents" in api_kwargs:
            try:
                contents = api_kwargs.get("contents")
                safe_log_kwargs["contents_count"] = len(contents) if hasattr(contents, "__len__") else None
            except Exception:
                safe_log_kwargs["contents_count"] = None
        log.info("Google AI Embeddings call kwargs (sanitized): %s", safe_log_kwargs)
        
        try:
            # Use embed_content for single text or batch embedding
            if "content" in api_kwargs:
                # Single embedding
                response = genai.embed_content(**api_kwargs)
            elif "contents" in api_kwargs:
                # Batch embedding - Google AI supports batch natively
                # Copy to avoid mutating the original dict (needed for retries)
                kwargs = api_kwargs.copy()
                contents = kwargs.pop("contents")
                response = genai.embed_content(content=contents, **kwargs)
            else:
                raise ValueError("Either 'content' or 'contents' must be provided")
                
            return response
            
        except Exception as e:
            log.error(f"Error calling Google AI Embeddings API: {e}")
            raise

    async def acall(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
        """Async call to Google AI embedding API.
        
        Note: Google AI Python client doesn't have async support yet,
        so this falls back to synchronous call.
        """
        # Google AI client doesn't have async support yet
        return self.call(api_kwargs, model_type)

================================================
FILE: api/logging_config.py
================================================
import logging
import os
from pathlib import Path
from logging.handlers import RotatingFileHandler


class IgnoreLogChangeDetectedFilter(logging.Filter):
    def filter(self, record: logging.LogRecord):
        return "Detected file change in" not in record.getMessage()


def setup_logging(format: str = None):
    """
    Configure logging for the application with log rotation.

    Environment variables:
        LOG_LEVEL: Log level (default: INFO)
        LOG_FILE_PATH: Path to log file (default: logs/application.log)
        LOG_MAX_SIZE: Max size in MB before rotating (default: 10MB)
        LOG_BACKUP_COUNT: Number of backup files to keep (default: 5)

    Ensures log directory exists, prevents path traversal, and configures
    both rotating file and console handlers.
    """
    # Determine log directory and default file path
    base_dir = Path(__file__).parent
    log_dir = base_dir / "logs"
    log_dir.mkdir(parents=True, exist_ok=True)
    default_log_file = log_dir / "application.log"

    # Get log level from environment
    log_level_str = os.environ.get("LOG_LEVEL", "INFO").upper()
    log_level = getattr(logging, log_level_str, logging.INFO)

    # Get log file path
    log_file_path = Path(os.environ.get("LOG_FILE_PATH", str(default_log_file)))

    # Secure path check: must be inside logs/ directory
    log_dir_resolved = log_dir.resolve()
    resolved_path = log_file_path.resolve()
    if not str(resolved_path).startswith(str(log_dir_resolved) + os.sep):
        raise ValueError(f"LOG_FILE_PATH '{log_file_path}' is outside the trusted log directory '{log_dir_resolved}'")

    # Ensure parent directories exist
    resolved_path.parent.mkdir(parents=True, exist_ok=True)

    # Get max log file size (default: 10MB)
    try:
        max_mb = int(os.environ.get("LOG_MAX_SIZE", 10))  # 10MB default
        max_bytes = max_mb * 1024 * 1024
    except (TypeError, ValueError):
        max_bytes = 10 * 1024 * 1024  # fallback to 10MB on error

    # Get backup count (default: 5)
    try:
        backup_count = int(os.environ.get("LOG_BACKUP_COUNT", 5))
    except ValueError:
        backup_count = 5

    # Configure format
    log_format = format or "%(asctime)s - %(levelname)s - %(name)s - %(filename)s:%(lineno)d - %(message)s"

    # Create handlers
    file_handler = RotatingFileHandler(resolved_path, maxBytes=max_bytes, backupCount=backup_count, encoding="utf-8")
    console_handler = logging.StreamHandler()

    # Set format for both handlers
    formatter = logging.Formatter(log_format)
    file_handler.setFormatter(formatter)
    console_handler.setFormatter(formatter)

    # Add filter to suppress "Detected file change" messages
    file_handler.addFilter(IgnoreLogChangeDetectedFilter())
    console_handler.addFilter(IgnoreLogChangeDetectedFilter())

    # Apply logging configuration
    logging.basicConfig(level=log_level, handlers=[file_handler, console_handler], force=True)

    # Log configuration info
    logger = logging.getLogger(__name__)
    logger.debug(
        f"Logging configured: level={log_level_str}, "
        f"file={resolved_path}, max_size={max_bytes} bytes, "
        f"backup_count={backup_count}"
    )


================================================
FILE: api/main.py
================================================
import os
import sys
import logging
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

from api.logging_config import setup_logging

# Configure logging
setup_logging()
logger = logging.getLogger(__name__)

# Configure watchfiles logger to show file paths
watchfiles_logger = logging.getLogger("watchfiles.main")
watchfiles_logger.setLevel(logging.DEBUG)  # Enable DEBUG to see file paths

# Add the current directory to the path so we can import the api package
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Apply watchfiles monkey patch BEFORE uvicorn import
is_development = os.environ.get("NODE_ENV") != "production"
if is_development:
    import watchfiles
    current_dir = os.path.dirname(os.path.abspath(__file__))
    logs_dir = os.path.join(current_dir, "logs")
    
    original_watch = watchfiles.watch
    def patched_watch(*args, **kwargs):
        # Only watch the api directory but exclude logs subdirectory
        # Instead of watching the entire api directory, watch specific subdirectories
        api_subdirs = []
        for item in os.listdir(current_dir):
            item_path = os.path.join(current_dir, item)
            if os.path.isdir(item_path) and item != "logs":
                api_subdirs.append(item_path)
            elif os.path.isfile(item_path) and item.endswith(".py"):
                api_subdirs.append(item_path)
        
        return original_watch(*api_subdirs, **kwargs)
    watchfiles.watch = patched_watch

import uvicorn

# Check for required environment variables
required_env_vars = ['GOOGLE_API_KEY', 'OPENAI_API_KEY']
missing_vars = [var for var in required_env_vars if not os.environ.get(var)]
if missing_vars:
    logger.warning(f"Missing environment variables: {', '.join(missing_vars)}")
    logger.warning("Some functionality may not work correctly without these variables.")

# Configure Google Generative AI
import google.generativeai as genai
from api.config import GOOGLE_API_KEY

if GOOGLE_API_KEY:
    genai.configure(api_key=GOOGLE_API_KEY)
else:
    logger.warning("GOOGLE_API_KEY not configured")

if __name__ == "__main__":
    # Get port from environment variable or use default
    port = int(os.environ.get("PORT", 8001))

    # Import the app here to ensure environment variables are set first
    from api.api import app

    logger.info(f"Starting Streaming API on port {port}")

    # Run the FastAPI app with uvicorn
    uvicorn.run(
        "api.api:app",
        host="0.0.0.0",
        port=port,
        reload=is_development,
        reload_excludes=["**/logs/*", "**/__pycache__/*", "**/*.pyc"] if is_development else None,
    )


================================================
FILE: api/ollama_patch.py
================================================
from typing import Sequence, List
from copy import deepcopy
from tqdm import tqdm
import logging
import adalflow as adal
from adalflow.core.types import Document
from adalflow.core.component import DataComponent
import requests
import os

# Configure logging
from api.logging_config import setup_logging

setup_logging()
logger = logging.getLogger(__name__)

class OllamaModelNotFoundError(Exception):
    """Custom exception for when Ollama model is not found"""
    pass

def check_ollama_model_exists(model_name: str, ollama_host: str = None) -> bool:
    """
    Check if an Ollama model exists before attempting to use it.
    
    Args:
        model_name: Name of the model to check
        ollama_host: Ollama host URL, defaults to localhost:11434
        
    Returns:
        bool: True if model exists, False otherwise
    """
    if ollama_host is None:
        ollama_host = os.getenv("OLLAMA_HOST", "http://localhost:11434")
    
    try:
        # Remove /api prefix if present and add it back
        if ollama_host.endswith('/api'):
            ollama_host = ollama_host[:-4]
        
        response = requests.get(f"{ollama_host}/api/tags", timeout=5)
        if response.status_code == 200:
            models_data = response.json()
            available_models = [model.get('name', '').split(':')[0] for model in models_data.get('models', [])]
            model_base_name = model_name.split(':')[0]  # Remove tag if present
            
            is_available = model_base_name in available_models
            if is_available:
                logger.info(f"Ollama model '{model_name}' is available")
            else:
                logger.warning(f"Ollama model '{model_name}' is not available. Available models: {available_models}")
            return is_available
        else:
            logger.warning(f"Could not check Ollama models, status code: {response.status_code}")
            return False
    except requests.exceptions.RequestException as e:
        logger.warning(f"Could not connect to Ollama to check models: {e}")
        return False
    except Exception as e:
        logger.warning(f"Error checking Ollama model availability: {e}")
        return False

class OllamaDocumentProcessor(DataComponent):
    """
    Process documents for Ollama embeddings by processing one document at a time.
    Adalflow Ollama Client does not support batch embedding, so we need to process each document individually.
    """
    def __init__(self, embedder: adal.Embedder) -> None:
        super().__init__()
        self.embedder = embedder

    def __call__(self, documents: Sequence[Document]) -> Sequence[Document]:
        output = deepcopy(documents)
        logger.info(f"Processing {len(output)} documents individually for Ollama embeddings")

        successful_docs = []
        expected_embedding_size = None

        for i, doc in enumerate(tqdm(output, desc="Processing documents for Ollama embeddings")):
            try:
                # Get embedding for a single document
                result = self.embedder(input=doc.text)
                if result.data and len(result.data) > 0:
                    embedding = result.data[0].embedding

                    # Validate embedding size consistency
                    if expected_embedding_size is None:
                        expected_embedding_size = len(embedding)
                        logger.info(f"Expected embedding size set to: {expected_embedding_size}")
                    elif len(embedding) != expected_embedding_size:
                        file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
                        logger.warning(f"Document '{file_path}' has inconsistent embedding size {len(embedding)} != {expected_embedding_size}, skipping")
                        continue

                    # Assign the embedding to the document
                    output[i].vector = embedding
                    successful_docs.append(output[i])
                else:
                    file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
                    logger.warning(f"Failed to get embedding for document '{file_path}', skipping")
            except Exception as e:
                file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
                logger.error(f"Error processing document '{file_path}': {e}, skipping")

        logger.info(f"Successfully processed {len(successful_docs)}/{len(output)} documents with consistent embeddings")
        return successful_docs

================================================
FILE: api/openai_client.py
================================================
"""OpenAI ModelClient integration."""

import os
import base64
from typing import (
    Dict,
    Sequence,
    Optional,
    List,
    Any,
    TypeVar,
    Callable,
    Generator,
    Union,
    Literal,
)
import re

import logging
import backoff

# optional import
from adalflow.utils.lazy_import import safe_import, OptionalPackages
from openai.types.chat.chat_completion import Choice

openai = safe_import(OptionalPackages.OPENAI.value[0], OptionalPackages.OPENAI.value[1])

from openai import OpenAI, AsyncOpenAI, Stream
from openai import (
    APITimeoutError,
    InternalServerError,
    RateLimitError,
    UnprocessableEntityError,
    BadRequestError,
)
from openai.types import (
    Completion,
    CreateEmbeddingResponse,
    Image,
)
from openai.types.chat import ChatCompletionChunk, ChatCompletion, ChatCompletionMessage

from adalflow.core.model_client import ModelClient
from adalflow.core.types import (
    ModelType,
    EmbedderOutput,
    TokenLogProb,
    CompletionUsage,
    GeneratorOutput,
)
from adalflow.components.model_client.utils import parse_embedding_response

log = logging.getLogger(__name__)
T = TypeVar("T")


# completion parsing functions and you can combine them into one singple chat completion parser
def get_first_message_content(completion: ChatCompletion) -> str:
    r"""When we only need the content of the first message.
    It is the default parser for chat completion."""
    log.debug(f"raw completion: {completion}")
    return completion.choices[0].message.content


# def _get_chat_completion_usage(completion: ChatCompletion) -> OpenAICompletionUsage:
#     return completion.usage


# A simple heuristic to estimate token count for estimating number of tokens in a Streaming response
def estimate_token_count(text: str) -> int:
    """
    Estimate the token count of a given text.

    Args:
        text (str): The text to estimate token count for.

    Returns:
        int: Estimated token count.
    """
    # Split the text into tokens using spaces as a simple heuristic
    tokens = text.split()

    # Return the number of tokens
    return len(tokens)


def parse_stream_response(completion: ChatCompletionChunk) -> str:
    r"""Parse the response of the stream API."""
    return completion.choices[0].delta.content


def handle_streaming_response(generator: Stream[ChatCompletionChunk]):
    r"""Handle the streaming response."""
    for completion in generator:
        log.debug(f"Raw chunk completion: {completion}")
        parsed_content = parse_stream_response(completion)
        yield parsed_content


def get_all_messages_content(completion: ChatCompletion) -> List[str]:
    r"""When the n > 1, get all the messages content."""
    return [c.message.content for c in completion.choices]


def get_probabilities(completion: ChatCompletion) -> List[List[TokenLogProb]]:
    r"""Get the probabilities of each token in the completion."""
    log_probs = []
    for c in completion.choices:
        content = c.logprobs.content
        print(content)
        log_probs_for_choice = []
        for openai_token_logprob in content:
            token = openai_token_logprob.token
            logprob = openai_token_logprob.logprob
            log_probs_for_choice.append(TokenLogProb(token=token, logprob=logprob))
        log_probs.append(log_probs_for_choice)
    return log_probs


class OpenAIClient(ModelClient):
    __doc__ = r"""A component wrapper for the OpenAI API client.

    Supports both embedding and chat completion APIs, including multimodal capabilities.

    Users can:
    1. Simplify use of ``Embedder`` and ``Generator`` components by passing `OpenAIClient()` as the `model_client`.
    2. Use this as a reference to create their own API client or extend this class by copying and modifying the code.

    Note:
        We recommend avoiding `response_format` to enforce output data type or `tools` and `tool_choice` in `model_kwargs` when calling the API.
        OpenAI's internal formatting and added prompts are unknown. Instead:
        - Use :ref:`OutputParser<components-output_parsers>` for response parsing and formatting.

        For multimodal inputs, provide images in `model_kwargs["images"]` as a path, URL, or list of them.
        The model must support vision capabilities (e.g., `gpt-4o`, `gpt-4o-mini`, `o1`, `o1-mini`).

        For image generation, use `model_type=ModelType.IMAGE_GENERATION` and provide:
        - model: `"dall-e-3"` or `"dall-e-2"`
        - prompt: Text description of the image to generate
        - size: `"1024x1024"`, `"1024x1792"`, or `"1792x1024"` for DALL-E 3; `"256x256"`, `"512x512"`, or `"1024x1024"` for DALL-E 2
        - quality: `"standard"` or `"hd"` (DALL-E 3 only)
        - n: Number of images to generate (1 for DALL-E 3, 1-10 for DALL-E 2)
        - response_format: `"url"` or `"b64_json"`

    Args:
        api_key (Optional[str], optional): OpenAI API key. Defaults to `None`.
        chat_completion_parser (Callable[[Completion], Any], optional): A function to parse the chat completion into a `str`. Defaults to `None`.
            The default parser is `get_first_message_content`.
        base_url (str): The API base URL to use when initializing the client.
            Defaults to `"https://api.openai.com"`, but can be customized for third-party API providers or self-hosted models.
        env_api_key_name (str): The environment variable name for the API key. Defaults to `"OPENAI_API_KEY"`.

    References:
        - OpenAI API Overview: https://platform.openai.com/docs/introduction
        - Embeddings Guide: https://platform.openai.com/docs/guides/embeddings
        - Chat Completion Models: https://platform.openai.com/docs/guides/text-generation
        - Vision Models: https://platform.openai.com/docs/guides/vision
        - Image Generation: https://platform.openai.com/docs/guides/images
    """

    def __init__(
        self,
        api_key: Optional[str] = None,
        chat_completion_parser: Callable[[Completion], Any] = None,
        input_type: Literal["text", "messages"] = "text",
        base_url: Optional[str] = None,
        env_base_url_name: str = "OPENAI_BASE_URL",
        env_api_key_name: str = "OPENAI_API_KEY",
    ):
        r"""It is recommended to set the OPENAI_API_KEY environment variable instead of passing it as an argument.

        Args:
            api_key (Optional[str], optional): OpenAI API key. Defaults to None.
            base_url (str): The API base URL to use when initializing the client.
            env_api_key_name (str): The environment variable name for the API key. Defaults to `"OPENAI_API_KEY"`.
        """
        super().__init__()
        self._api_key = api_key
        self._env_api_key_name = env_api_key_name
        self._env_base_url_name = env_base_url_name
        self.base_url = base_url or os.getenv(self._env_base_url_name, "https://api.openai.com/v1")
        self.sync_client = self.init_sync_client()
        self.async_client = None  # only initialize if the async call is called
        self.chat_completion_parser = (
            chat_completion_parser or get_first_message_content
        )
        self._input_type = input_type
        self._api_kwargs = {}  # add api kwargs when the OpenAI Client is called

    def init_sync_client(self):
        api_key = self._api_key or os.getenv(self._env_api_key_name)
        if not api_key:
            raise ValueError(
                f"Environment variable {self._env_api_key_name} must be set"
            )
        return OpenAI(api_key=api_key, base_url=self.base_url)

    def init_async_client(self):
        api_key = self._api_key or os.getenv(self._env_api_key_name)
        if not api_key:
            raise ValueError(
                f"Environment variable {self._env_api_key_name} must be set"
            )
        return AsyncOpenAI(api_key=api_key, base_url=self.base_url)

    # def _parse_chat_completion(self, completion: ChatCompletion) -> "GeneratorOutput":
    #     # TODO: raw output it is better to save the whole completion as a source of truth instead of just the message
    #     try:
    #         data = self.chat_completion_parser(completion)
    #         usage = self.track_completion_usage(completion)
    #         return GeneratorOutput(
    #             data=data, error=None, raw_response=str(data), usage=usage
    #         )
    #     except Exception as e:
    #         log.error(f"Error parsing the completion: {e}")
    #         return GeneratorOutput(data=None, error=str(e), raw_response=completion)

    def parse_chat_completion(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> "GeneratorOutput":
        """Parse the completion, and put it into the raw_response."""
        log.debug(f"completion: {completion}, parser: {self.chat_completion_parser}")
        try:
            data = self.chat_completion_parser(completion)
        except Exception as e:
            log.error(f"Error parsing the completion: {e}")
            return GeneratorOutput(data=None, error=str(e), raw_response=completion)

        try:
            usage = self.track_completion_usage(completion)
            return GeneratorOutput(
                data=None, error=None, raw_response=data, usage=usage
            )
        except Exception as e:
            log.error(f"Error tracking the completion usage: {e}")
            return GeneratorOutput(data=None, error=str(e), raw_response=data)

    def track_completion_usage(
        self,
        completion: Union[ChatCompletion, Generator[ChatCompletionChunk, None, None]],
    ) -> CompletionUsage:

        try:
            usage: CompletionUsage = CompletionUsage(
                completion_tokens=completion.usage.completion_tokens,
                prompt_tokens=completion.usage.prompt_tokens,
                total_tokens=completion.usage.total_tokens,
            )
            return usage
        except Exception as e:
            log.error(f"Error tracking the completion usage: {e}")
            return CompletionUsage(
                completion_tokens=None, prompt_tokens=None, total_tokens=None
            )

    def parse_embedding_response(
        self, response: CreateEmbeddingResponse
    ) -> EmbedderOutput:
        r"""Parse the embedding response to a structure Adalflow components can understand.

        Should be called in ``Embedder``.
        """
        try:
            return parse_embedding_response(response)
        except Exception as e:
            log.error(f"Error parsing the embedding response: {e}")
            return EmbedderOutput(data=[], error=str(e), raw_response=response)

    def convert_inputs_to_api_kwargs(
        self,
        input: Optional[Any] = None,
        model_kwargs: Dict = {},
        model_type: ModelType = ModelType.UNDEFINED,
    ) -> Dict:
        r"""
        Specify the API input type and output api_kwargs that will be used in _call and _acall methods.
        Convert the Component's standard input, and system_input(chat model) and model_kwargs into API-specific format.
        For multimodal inputs, images can be provided in model_kwargs["images"] as a string path, URL, or list of them.
        The model specified in model_kwargs["model"] must support multimodal capabilities when using images.

        Args:
            input: The input text or messages to process
            model_kwargs: Additional parameters including:
                - images: Optional image source(s) as path, URL, or list of them
                - detail: Image detail level ('auto', 'low', or 'high'), defaults to 'auto'
                - model: The model to use (must support multimodal inputs if images are provided)
            model_type: The type of model (EMBEDDER or LLM)

        Returns:
            Dict: API-specific kwargs for the model call
        """

        final_model_kwargs = model_kwargs.copy()
        if model_type == ModelType.EMBEDDER:
            if isinstance(input, str):
                input = [input]
            # convert input to input
            if not isinstance(input, Sequence):
                raise TypeError("input must be a sequence of text")
            final_model_kwargs["input"] = input
        elif model_type == ModelType.LLM:
            # convert input to messages
            messages: List[Dict[str, str]] = []
            images = final_model_kwargs.pop("images", None)
            detail = final_model_kwargs.pop("detail", "auto")

            if self._input_type == "messages":
                system_start_tag = "<START_OF_SYSTEM_PROMPT>"
                system_end_tag = "<END_OF_SYSTEM_PROMPT>"
                user_start_tag = "<START_OF_USER_PROMPT>"
                user_end_tag = "<END_OF_USER_PROMPT>"

                # new regex pattern to ignore special characters such as \n
                pattern = (
                    rf"{system_start_tag}\s*(.*?)\s*{system_end_tag}\s*"
                    rf"{user_start_tag}\s*(.*?)\s*{user_end_tag}"
                )

                # Compile the regular expression

                # re.DOTALL is to allow . to match newline so that (.*?) does not match in a single line
                regex = re.compile(pattern, re.DOTALL)
                # Match the pattern
                match = regex.match(input)
                system_prompt, input_str = None, None

                if match:
                    system_prompt = match.group(1)
                    input_str = match.group(2)
                else:
                    print("No match found.")
                if system_prompt and input_str:
                    messages.append({"role": "system", "content": system_prompt})
                    if images:
                        content = [{"type": "text", "text": input_str}]
                        if isinstance(images, (str, dict)):
                            images = [images]
                        for img in images:
                            content.append(self._prepare_image_content(img, detail))
                        messages.append({"role": "user", "content": content})
                    else:
                        messages.append({"role": "user", "content": input_str})
            if len(messages) == 0:
                if images:
                    content = [{"type": "text", "text": input}]
                    if isinstance(images, (str, dict)):
                        images = [images]
                    for img in images:
                        content.append(self._prepare_image_content(img, detail))
                    messages.append({"role": "user", "content": content})
                else:
                    messages.append({"role": "user", "content": input})
            final_model_kwargs["messages"] = messages
        elif model_type == ModelType.IMAGE_GENERATION:
            # For image generation, input is the prompt
            final_model_kwargs["prompt"] = input
            # Ensure model is specified
            if "model" not in final_model_kwargs:
                raise ValueError("model must be specified for image generation")
            # Set defaults for DALL-E 3 if not specified
            final_model_kwargs["size"] = final_model_kwargs.get("size", "1024x1024")
            final_model_kwargs["quality"] = final_model_kwargs.get(
                "quality", "standard"
            )
            final_model_kwargs["n"] = final_model_kwargs.get("n", 1)
            final_model_kwargs["response_format"] = final_model_kwargs.get(
                "response_format", "url"
            )

            # Handle image edits and variations
            image = final_model_kwargs.get("image")
            if isinstance(image, str) and os.path.isfile(image):
                final_model_kwargs["image"] = self._encode_image(image)

            mask = final_model_kwargs.get("mask")
            if isinstance(mask, str) and os.path.isfile(mask):
                final_model_kwargs["mask"] = self._encode_image(mask)
        else:
            raise ValueError(f"model_type {model_type} is not supported")

        return final_model_kwargs

    def parse_image_generation_response(self, response: List[Image]) -> GeneratorOutput:
        """Parse the image generation response into a GeneratorOutput."""
        try:
            # Extract URLs or base64 data from the response
            data = [img.url or img.b64_json for img in response]
            # For single image responses, unwrap from list
            if len(data) == 1:
                data = data[0]
            return GeneratorOutput(
                data=data,
                raw_response=str(response),
            )
        except Exception as e:
            log.error(f"Error parsing image generation response: {e}")
            return GeneratorOutput(data=None, error=str(e), raw_response=str(response))

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    def call(self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED):
        """
        kwargs is the combined input and model_kwargs.  Support streaming call.
        """
        log.info(f"api_kwargs: {api_kwargs}")
        self._api_kwargs = api_kwargs
        if model_type == ModelType.EMBEDDER:
            return self.sync_client.embeddings.create(**api_kwargs)
        elif model_type == ModelType.LLM:
            if "stream" in api_kwargs and api_kwargs.get("stream", False):
                log.debug("streaming call")
                self.chat_completion_parser = handle_streaming_response
                return self.sync_client.chat.completions.create(**api_kwargs)
            else:
                log.debug("non-streaming call converted to streaming")
                # Make a copy of api_kwargs to avoid modifying the original
                streaming_kwargs = api_kwargs.copy()
                streaming_kwargs["stream"] = True

                # Get streaming response
                stream_response = self.sync_client.chat.completions.create(**streaming_kwargs)

                # Accumulate all content from the stream
                accumulated_content = ""
                id = ""
                model = ""
                created = 0
                for chunk in stream_response:
                    id = getattr(chunk, "id", None) or id
                    model = getattr(chunk, "model", None) or model
                    created = getattr(chunk, "created", 0) or created
                    choices = getattr(chunk, "choices", [])
                    if len(choices) > 0:
                        delta = getattr(choices[0], "delta", None)
                        if delta is not None:
                            text = getattr(delta, "content", None)
                            if text is not None:
                                accumulated_content += text or ""
                # Return the mock completion object that will be processed by the chat_completion_parser
                return ChatCompletion(
                    id = id,
                    model=model,
                    created=created,
                    object="chat.completion",
                    choices=[Choice(
                        index=0,
                        finish_reason="stop",
                        message=ChatCompletionMessage(content=accumulated_content, role="assistant")
                    )]
                )
        elif model_type == ModelType.IMAGE_GENERATION:
            # Determine which image API to call based on the presence of image/mask
            if "image" in api_kwargs:
                if "mask" in api_kwargs:
                    # Image edit
                    response = self.sync_client.images.edit(**api_kwargs)
                else:
                    # Image variation
                    response = self.sync_client.images.create_variation(**api_kwargs)
            else:
                # Image generation
                response = self.sync_client.images.generate(**api_kwargs)
            return response.data
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @backoff.on_exception(
        backoff.expo,
        (
            APITimeoutError,
            InternalServerError,
            RateLimitError,
            UnprocessableEntityError,
            BadRequestError,
        ),
        max_time=5,
    )
    async def acall(
        self, api_kwargs: Dict = {}, model_type: ModelType = ModelType.UNDEFINED
    ):
        """
        kwargs is the combined input and model_kwargs
        """
        # store the api kwargs in the client
        self._api_kwargs = api_kwargs
        if self.async_client is None:
            self.async_client = self.init_async_client()
        if model_type == ModelType.EMBEDDER:
            return await self.async_client.embeddings.create(**api_kwargs)
        elif model_type == ModelType.LLM:
            return await self.async_client.chat.completions.create(**api_kwargs)
        elif model_type == ModelType.IMAGE_GENERATION:
            # Determine which image API to call based on the presence of image/mask
            if "image" in api_kwargs:
                if "mask" in api_kwargs:
                    # Image edit
                    response = await self.async_client.images.edit(**api_kwargs)
                else:
                    # Image variation
                    response = await self.async_client.images.create_variation(
                        **api_kwargs
                    )
            else:
                # Image generation
                response = await self.async_client.images.generate(**api_kwargs)
            return response.data
        else:
            raise ValueError(f"model_type {model_type} is not supported")

    @classmethod
    def from_dict(cls: type[T], data: Dict[str, Any]) -> T:
        obj = super().from_dict(data)
        # recreate the existing clients
        obj.sync_client = obj.init_sync_client()
        obj.async_client = obj.init_async_client()
        return obj

    def to_dict(self) -> Dict[str, Any]:
        r"""Convert the component to a dictionary."""
        # TODO: not exclude but save yes or no for recreating the clients
        exclude = [
            "sync_client",
            "async_client",
        ]  # unserializable object
        output = super().to_dict(exclude=exclude)
        return output

    def _encode_image(self, image_path: str) -> str:
        """Encode image to base64 string.

        Args:
            image_path: Path to image file.

        Returns:
            Base64 encoded image string.

        Raises:
            ValueError: If the file cannot be read or doesn't exist.
        """
        try:
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode("utf-8")
        except FileNotFoundError:
            raise ValueError(f"Image file not found: {image_path}")
        except PermissionError:
            raise ValueError(f"Permission denied when reading image file: {image_path}")
        except Exception as e:
            raise ValueError(f"Error encoding image {image_path}: {str(e)}")

    def _prepare_image_content(
        self, image_source: Union[str, Dict[str, Any]], detail: str = "auto"
    ) -> Dict[str, Any]:
        """Prepare image content for API request.

        Args:
            image_source: Either a path to local image or a URL.
            detail: Image detail level ('auto', 'low', or 'high').

        Returns:
            Formatted image content for API request.
        """
        if isinstance(image_source, str):
            if image_source.startswith(("http://", "https://")):
                return {
                    "type": "image_url",
                    "image_url": {"url": image_source, "detail": detail},
                }
            else:
                base64_image = self._encode_image(image_source)
                return {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": detail,
                    },
                }
        return image_source


# Example usage:
if __name__ == "__main__":
    from adalflow.core import Generator
    from adalflow.utils import setup_env

    # log = get_logger(level="DEBUG")

    setup_env()
    prompt_kwargs = {"input_str": "What is the meaning of life?"}

    gen = Generator(
        model_client=OpenAIClient(),
        model_kwargs={"model": "gpt-4o", "stream": False},
    )
    gen_response = gen(prompt_kwargs)
    print(f"gen_response: {gen_response}")

    # for genout in gen_response.data:
    #     print(f"genout: {genout}")

    # test that to_dict and from_dict works
    # model_client = OpenAIClient()
    # model_client_dict = model_client.to_dict()
    # from_dict_model_client = OpenAIClient.from_dict(model_client_dict)
    # assert model_client_dict == from_dict_model_client.to_dict()


if __name__ == "__main__":
    import adalflow as adal

    # setup env or pass the api_key
    from adalflow.utils import setup_env

    setup_env()

    openai_llm = adal.Generator(
        model_client=OpenAIClient(), model_kwargs={"model": "gpt-4o"}
    )
    resopnse = openai_llm(prompt_kwargs={"input_str": "What is LLM?"})
    print(resopnse)


================================================
FILE: api/openrouter_client.py
================================================
"""OpenRouter ModelClient integration."""

from typing import Dict, Sequence, Optional, Any, List
import logging
import json
import aiohttp
import requests
from requests.exceptions import RequestException, Timeout

from adalflow.core.model_client import ModelClient
from adalflow.core.types import (
    CompletionUsage,
    ModelType,
    GeneratorOutput,
)

log = logging.getLogger(__name__)

class OpenRouterClient(ModelClient):
    __doc__ = r"""A component wrapper for the OpenRouter API client.

    OpenRouter provides a unified API that gives access to hundreds of AI models through a single endpoint.
    The API is compatible with OpenAI's API format with a few small differences.

    Visit https://openrouter.ai/docs for more details.

    Example:
        ```python
        from api.openrouter_client import OpenRouterClient

        client = OpenRouterClient()
        generator = adal.Generator(
            model_client=client,
            model_kwargs={"model": "openai/gpt-4o"}
        )
        ```
    """

    def __init__(self, *args, **kwargs) -> None:
        """Initialize the OpenRouter client."""
        super().__init__(*args, **kwargs)
        self.sync_client = self.init_sync_client()
        self.async_client = None  # Initialize async client only when needed

    def init_sync_client(self):
        """Initialize the synchronous OpenRouter client."""
        from api.config import OPENROUTER_API_KEY
        api_key = OPENROUTER_API_KEY
        if not api_key:
            log.warning("OPENROUTER_API_KEY not configured")

        # OpenRouter doesn't have a dedicated client library, so we'll use requests directly
        return {
            "api_key": api_key,
            "base_url": "https://openrouter.ai/api/v1"
        }

    def init_async_client(self):
        """Initialize the asynchronous OpenRouter client."""
        from api.config import OPENROUTER_API_KEY
        api_key = OPENROUTER_API_KEY
        if not api_key:
            log.warning("OPENROUTER_API_KEY not configured")

        # For async, we'll use aiohttp
        return {
            "api_key": api_key,
            "base_url": "https://openrouter.ai/api/v1"
        }

    def convert_inputs_to_api_kwargs(
        self, input: Any, model_kwargs: Dict = None, model_type: ModelType = None
    ) -> Dict:
        """Convert AdalFlow inputs to OpenRouter API format."""
        model_kwargs = model_kwargs or {}

        if model_type == ModelType.LLM:
            # Handle LLM generation
            messages = []

            # Convert input to messages format if it's a string
            if isinstance(input, str):
                messages = [{"role": "user", "content": input}]
            elif isinstance(input, list) and all(isinstance(msg, dict) for msg in input):
                messages = input
            else:
                raise ValueError(f"Unsupported input format for OpenRouter: {type(input)}")

            # For debugging
            log.info(f"Messages for OpenRouter: {messages}")

            api_kwargs = {
                "messages": messages,
                **model_kwargs
            }

            # Ensure model is specified
            if "model" not in api_kwargs:
                api_kwargs["model"] = "openai/gpt-3.5-turbo"

            return api_kwargs

        elif model_type == ModelType.EMBEDDING:
            # OpenRouter doesn't support embeddings directly
            # We could potentially use a specific model through OpenRouter for embeddings
            # but for now, we'll raise an error
            raise NotImplementedError("OpenRouter client does not support embeddings yet")

        else:
            raise ValueError(f"Unsupported model type: {model_type}")

    async def acall(self, api_kwargs: Dict = None, model_type: ModelType = None) -> Any:
        """Make an asynchronous call to the OpenRouter API."""
        if not self.async_client:
            self.async_client = self.init_async_client()

        # Check if API key is set
        if not self.async_client.get("api_key"):
            error_msg = "OPENROUTER_API_KEY not configured. Please set this environment variable to use OpenRouter."
            log.error(error_msg)
            # Instead of raising an exception, return a generator that yields the error message
            # This allows the error to be displayed to the user in the streaming response
            async def error_generator():
                yield error_msg
            return error_generator()

        api_kwargs = api_kwargs or {}

        if model_type == ModelType.LLM:
            # Prepare headers
            headers = {
                "Authorization": f"Bearer {self.async_client['api_key']}",
                "Content-Type": "application/json",
                "HTTP-Referer": "https://github.com/AsyncFuncAI/deepwiki-open",  # Optional
                "X-Title": "DeepWiki"  # Optional
            }

            # Always use non-streaming mode for OpenRouter
            api_kwargs["stream"] = False

            # Make the API call
            try:
                log.info(f"Making async OpenRouter API call to {self.async_client['base_url']}/chat/completions")
                log.info(f"Request headers: {headers}")
                log.info(f"Request body: {api_kwargs}")

                async with aiohttp.ClientSession() as session:
                    try:
                        async with session.post(
                            f"{self.async_client['base_url']}/chat/completions",
                            headers=headers,
                            json=api_kwargs,
                            timeout=60
                        ) as response:
                            if response.status != 200:
                                error_text = await response.text()
                                log.error(f"OpenRouter API error ({response.status}): {error_text}")

                                # Return a generator that yields the error message
                                async def error_response_generator():
                                    yield f"OpenRouter API error ({response.status}): {error_text}"
                                return error_response_generator()

                            # Get the full response
                            data = await response.json()
                            log.info(f"Received response from OpenRouter: {data}")

                            # Create a generator that yields the content
                            async def content_generator():
                                if "choices" in data and len(data["choices"]) > 0:
                                    choice = data["choices"][0]
                                    if "message" in choice and "content" in choice["message"]:
                                        content = choice["message"]["content"]
                                        log.info("Successfully retrieved response")

                                        # Check if the content is XML and ensure it's properly formatted
                                        if content.strip().startswith("<") and ">" in content:
                                            # It's likely XML, let's make sure it's properly formatted
                                            try:
                                                # Extract the XML content
                                                xml_content = content

                                                # Check if it's a wiki_structure XML
                                                if "<wiki_structure>" in xml_content:
                                                    log.info("Found wiki_structure XML, ensuring proper format")

                                                    # Extract just the wiki_structure XML
                                                    import re
                                                    wiki_match = re.search(r'<wiki_structure>[\s\S]*?<\/wiki_structure>', xml_content)
                                                    if wiki_match:
                                                        # Get the raw XML
                                                        raw_xml = wiki_match.group(0)

                                                        # Clean the XML by removing any leading/trailing whitespace
                                                        # and ensuring it's properly formatted
                                                        clean_xml = raw_xml.strip()

                                                        # Try to fix common XML issues
                                                        try:
                                                            # Replace problematic characters in XML
                                                            fixed_xml = clean_xml

                                                            # Replace & with &amp; if not already part of an entity
                                                            fixed_xml = re.sub(r'&(?!amp;|lt;|gt;|apos;|quot;)', '&amp;', fixed_xml)

                                                            # Fix other common XML issues
                                                            fixed_xml = fixed_xml.replace('</', '</').replace('  >', '>')

                                                            # Try to parse the fixed XML
                                                            from xml.dom.minidom import parseString
                                                            dom = parseString(fixed_xml)

                                                            # Get the pretty-printed XML with proper indentation
                                                            pretty_xml = dom.toprettyxml()

                                                            # Remove XML declaration
                                                            if pretty_xml.startswith('<?xml'):
                                                                pretty_xml = pretty_xml[pretty_xml.find('?>')+2:].strip()

                                                            log.info(f"Extracted and validated XML: {pretty_xml[:100]}...")
                                                            yield pretty_xml
                                                        except Exception as xml_parse_error:
                                                            log.warning(f"XML validation failed: {str(xml_parse_error)}, using raw XML")

                                                            # If XML validation fails, try a more aggressive approach
                                                            try:
                                                                # Use regex to extract just the structure without any problematic characters
                                                                import re

                                                                # Extract the basic structure
                                                                structure_match = re.search(r'<wiki_structure>(.*?)</wiki_structure>', clean_xml, re.DOTALL)
                                                                if structure_match:
                                                                    structure = structure_match.group(1).strip()

                                                                    # Rebuild a clean XML structure
                                                                    clean_structure = "<wiki_structure>\n"

                                                                    # Extract title
                                                                    title_match = re.search(r'<title>(.*?)</title>', structure, re.DOTALL)
                                                                    if title_match:
                                                                        title = title_match.group(1).strip()
                                                                        clean_structure += f"  <title>{title}</title>\n"

                                                                    # Extract description
                                                                    desc_match = re.search(r'<description>(.*?)</description>', structure, re.DOTALL)
                                                                    if desc_match:
                                                                        desc = desc_match.group(1).strip()
                                                                        clean_structure += f"  <description>{desc}</description>\n"

                                                                    # Add pages section
                                                                    clean_structure += "  <pages>\n"

                                                                    # Extract pages
                                                                    pages = re.findall(r'<page id="(.*?)">(.*?)</page>', structure, re.DOTALL)
                                                                    for page_id, page_content in pages:
                                                                        clean_structure += f'    <page id="{page_id}">\n'

                                                                        # Extract page title
                                                                        page_title_match = re.search(r'<title>(.*?)</title>', page_content, re.DOTALL)
                                                                        if page_title_match:
                                                                            page_title = page_title_match.group(1).strip()
                                                                            clean_structure += f"      <title>{page_title}</title>\n"

                                                                        # Extract page description
                                                                        page_desc_match = re.search(r'<description>(.*?)</description>', page_content, re.DOTALL)
                                                                        if page_desc_match:
                                                                            page_desc = page_desc_match.group(1).strip()
                                                                            clean_structure += f"      <description>{page_desc}</description>\n"

                                                                        # Extract importance
                                                                        importance_match = re.search(r'<importance>(.*?)</importance>', page_content, re.DOTALL)
                                                                        if importance_match:
                                                                            importance = importance_match.group(1).strip()
                                                                            clean_structure += f"      <importance>{importance}</importance>\n"

                                                                        # Extract relevant files
                                                                        clean_structure += "      <relevant_files>\n"
                                                                        file_paths = re.findall(r'<file_path>(.*?)</file_path>', page_content, re.DOTALL)
                                                                        for file_path in file_paths:
                                                                            clean_structure += f"        <file_path>{file_path.strip()}</file_path>\n"
                                                                        clean_structure += "      </relevant_files>\n"

                                                                        # Extract related pages
                                                                        clean_structure += "      <related_pages>\n"
                                                                        related_pages = re.findall(r'<related>(.*?)</related>', page_content, re.DOTALL)
                                                                        for related in related_pages:
                                                                            clean_structure += f"        <related>{related.strip()}</related>\n"
                                                                        clean_structure += "      </related_pages>\n"

                                                                        clean_structure += "    </page>\n"

                                                                    clean_structure += "  </pages>\n</wiki_structure>"

                                                                    log.info("Successfully rebuilt clean XML structure")
                                                                    yield clean_structure
                                                                else:
                                                                    log.warning("Could not extract wiki structure, using raw XML")
                                                                    yield clean_xml
                                                            except Exception as rebuild_error:
                                                                log.warning(f"Failed to rebuild XML: {str(rebuild_error)}, using raw XML")
                                                                yield clean_xml
                                                    else:
                                                        # If we can't extract it, just yield the original content
                                                        log.warning("Could not extract wiki_structure XML, yielding original content")
                                                        yield xml_content
                                                else:
                                                    # For other XML content, just yield it as is
                                                    yield content
                                            except Exception as xml_error:
                                                log.error(f"Error processing XML content: {str(xml_error)}")
                                                yield content
                                        else:
                                            # Not XML, just yield the content
                                            yield content
                                    else:
                                        log.error(f"Unexpected response format: {data}")
                                        yield "Error: Unexpected response format from OpenRouter API"
                                else:
                                    log.error(f"No choices in response: {data}")
                                    yield "Error: No response content from OpenRouter API"

                            return content_generator()
                    except aiohttp.ClientError as e:
                        e_client = e
                        log.error(f"Connection error with OpenRouter API: {str(e_client)}")

                        # Return a generator that yields the error message
                        async def connection_error_generator():
                            yield f"Connection error with OpenRouter API: {str(e_client)}. Please check your internet connection and that the OpenRouter API is accessible."
                        return connection_error_generator()

            except RequestException as e:
                e_req = e
                log.error(f"Error calling OpenRouter API asynchronously: {str(e_req)}")

                # Return a generator that yields the error message
                async def request_error_generator():
                    yield f"Error calling OpenRouter API: {str(e_req)}"
                return request_error_generator()

            except Exception as e:
                e_unexp = e
                log.error(f"Unexpected error calling OpenRouter API asynchronously: {str(e_unexp)}")

                # Return a generator that yields the error message
                async def unexpected_error_generator():
                    yield f"Unexpected error calling OpenRouter API: {str(e_unexp)}"
                return unexpected_error_generator()

        else:
            error_msg = f"Unsupported model type: {model_type}"
            log.error(error_msg)

            # Return a generator that yields the error message
            async def model_type_error_generator():
                yield error_msg
            return model_type_error_generator()

    def _process_completion_response(self, data: Dict) -> GeneratorOutput:
        """Process a non-streaming completion response from OpenRouter."""
        try:
            # Extract the completion text from the response
            if not data.get("choices"):
                raise ValueError(f"No choices in OpenRouter response: {data}")

            choice = data["choices"][0]

            if "message" in choice:
                content = choice["message"].get("content", "")
            elif "text" in choice:
                content = choice.get("text", "")
            else:
                raise ValueError(f"Unexpected response format from OpenRouter: {choice}")

            # Extract usage information if available
            usage = None
            if "usage" in data:
                usage = CompletionUsage(
                    prompt_tokens=data["usage"].get("prompt_tokens", 0),
                    completion_tokens=data["usage"].get("completion_tokens", 0),
                    total_tokens=data["usage"].get("total_tokens", 0)
                )

            # Create and return the GeneratorOutput
            return GeneratorOutput(
                data=content,
                usage=usage,
                raw_response=data
            )

        except Exception as e_proc:
            log.error(f"Error processing OpenRouter completion response: {str(e_proc)}")
            raise

    def _process_streaming_response(self, response):
        """Process a streaming response from OpenRouter."""
        try:
            log.info("Starting to process streaming response from OpenRouter")
            buffer = ""

            for chunk in response.iter_content(chunk_size=1024, decode_unicode=True):
                try:
                    # Add chunk to buffer
                    buffer += chunk

                    # Process complete lines in the buffer
                    while '\n' in buffer:
                        line, buffer = buffer.split('\n', 1)
                        line = line.strip()

                        if not line:
                            continue

                        log.debug(f"Processing line: {line}")

                        # Skip SSE comments (lines starting with :)
                        if line.startswith(':'):
                            log.debug(f"Skipping SSE comment: {line}")
                            continue

                        if line.startswith("data: "):
                            data = line[6:]  # Remove "data: " prefix

                            # Check for stream end
                            if data == "[DONE]":
                                log.info("Received [DONE] marker")
                                break

                            try:
                                data_obj = json.loads(data)
                                log.debug(f"Parsed JSON data: {data_obj}")

                                # Extract content from delta
                                if "choices" in data_obj and len(data_obj["choices"]) > 0:
                                    choice = data_obj["choices"][0]

                                    if "delta" in choice and "content" in choice["delta"] and choice["delta"]["content"]:
                                        content = choice["delta"]["content"]
                                        log.debug(f"Yielding delta content: {content}")
                                        yield content
                                    elif "text" in choice:
                                        log.debug(f"Yielding text content: {choice['text']}")
                                        yield choice["text"]
                                    else:
                                        log.debug(f"No content found in choice: {choice}")
                                else:
                                    log.debug(f"No choices found in data: {data_obj}")

                            except json.JSONDecodeError:
                                log.warning(f"Failed to parse SSE data: {data}")
                                continue
                except Exception as e_chunk:
                    log.error(f"Error processing streaming chunk: {str(e_chunk)}")
                    yield f"Error processing response chunk: {str(e_chunk)}"
        except Exception as e_stream:
            log.error(f"Error in streaming response: {str(e_stream)}")
            yield f"Error in streaming response: {str(e_stream)}"

    async def _process_async_streaming_response(self, response):
        """Process an asynchronous streaming response from OpenRouter."""
        buffer = ""
        try:
            log.info("Starting to process async streaming response from OpenRouter")
            async for chunk in response.content:
                try:
                    # Convert bytes to string and add to buffer
                    if isinstance(chunk, bytes):
                        chunk_str = chunk.decode('utf-8')
                    else:
                        chunk_str = str(chunk)

                    buffer += chunk_str

                    # Process complete lines in the buffer
                    while '\n' in buffer:
                        line, buffer = buffer.split('\n', 1)
                        line = line.strip()

                        if not line:
                            continue

                        log.debug(f"Processing line: {line}")

                        # Skip SSE comments (lines starting with :)
                        if line.startswith(':'):
                            log.debug(f"Skipping SSE comment: {line}")
                            continue

                        if line.startswith("data: "):
                            data = line[6:]  # Remove "data: " prefix

                            # Check for stream end
                            if data == "[DONE]":
                                log.info("Received [DONE] marker")
                                break

                            try:
                                data_obj = json.loads(data)
                                log.debug(f"Parsed JSON data: {data_obj}")

                                # Extract content from delta
                                if "choices" in data_obj and len(data_obj["choices"]) > 0:
                                    choice = data_obj["choices"][0]

                                    if "delta" in choice and "content" in choice["delta"] and choice["delta"]["content"]:
                                        content = choice["delta"]["content"]
                                        log.debug(f"Yielding delta content: {content}")
                                        yield content
                                    elif "text" in choice:
                                        log.debug(f"Yielding text content: {choice['text']}")
                                        yield choice["text"]
                                    else:
                                        log.debug(f"No content found in choice: {choice}")
                                else:
                                    log.debug(f"No choices found in data: {data_obj}")

                            except json.JSONDecodeError:
                                log.warning(f"Failed to parse SSE data: {data}")
                                continue
                except Exception as e_chunk:
                    log.error(f"Error processing streaming chunk: {str(e_chunk)}")
                    yield f"Error processing response chunk: {str(e_chunk)}"
        except Exception as e_stream:
            log.error(f"Error in async streaming response: {str(e_stream)}")
            yield f"Error in streaming response: {str(e_stream)}"


================================================
FILE: api/prompts.py
================================================
"""Module containing all prompts used in the DeepWiki project."""

# System prompt for RAG
RAG_SYSTEM_PROMPT = r"""
You are a code assistant which answers user questions on a Github Repo.
You will receive user query, relevant context, and past conversation history.

LANGUAGE DETECTION AND RESPONSE:
- Detect the language of the user's query
- Respond in the SAME language as the user's query
- IMPORTANT:If a specific language is requested in the prompt, prioritize that language over the query language

FORMAT YOUR RESPONSE USING MARKDOWN:
- Use proper markdown syntax for all formatting
- For code blocks, use triple backticks with language specification (```python, ```javascript, etc.)
- Use ## headings for major sections
- Use bullet points or numbered lists where appropriate
- Format tables using markdown table syntax when presenting structured data
- Use **bold** and *italic* for emphasis
- When referencing file paths, use `inline code` formatting

IMPORTANT FORMATTING RULES:
1. DO NOT include ```markdown fences at the beginning or end of your answer
2. Start your response directly with the content
3. The content will already be rendered as markdown, so just provide the raw markdown content

Think step by step and ensure your answer is well-structured and visually organized.
"""

# Template for RAG
RAG_TEMPLATE = r"""<START_OF_SYS_PROMPT>
{system_prompt}
{output_format_str}
<END_OF_SYS_PROMPT>
{# OrderedDict of DialogTurn #}
{% if conversation_history %}
<START_OF_CONVERSATION_HISTORY>
{% for key, dialog_turn in conversation_history.items() %}
{{key}}.
User: {{dialog_turn.user_query.query_str}}
You: {{dialog_turn.assistant_response.response_str}}
{% endfor %}
<END_OF_CONVERSATION_HISTORY>
{% endif %}
{% if contexts %}
<START_OF_CONTEXT>
{% for context in contexts %}
{{loop.index}}.
File Path: {{context.meta_data.get('file_path', 'unknown')}}
Content: {{context.text}}
{% endfor %}
<END_OF_CONTEXT>
{% endif %}
<START_OF_USER_PROMPT>
{{input_str}}
<END_OF_USER_PROMPT>
"""

# System prompts for simple chat
DEEP_RESEARCH_FIRST_ITERATION_PROMPT = """<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are conducting a multi-turn Deep Research process to thoroughly investigate the specific topic in the user's query.
Your goal is to provide detailed, focused information EXCLUSIVELY about this topic.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- This is the first iteration of a multi-turn research process focused EXCLUSIVELY on the user's query
- Start your response with "## Research Plan"
- Outline your approach to investigating this specific topic
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- Clearly state the specific topic you're researching to maintain focus throughout all iterations
- Identify the key aspects you'll need to research
- Provide initial findings based on the information available
- End with "## Next Steps" indicating what you'll investigate in the next iteration
- Do NOT provide a final conclusion yet - this is just the beginning of the research
- Do NOT include general repository information unless directly relevant to the query
- Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
- Your research MUST directly address the original question
- NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
- Remember that this topic will be maintained across all research iterations
</guidelines>

<style>
- Be concise but thorough
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
</style>"""

DEEP_RESEARCH_FINAL_ITERATION_PROMPT = """<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are in the final iteration of a Deep Research process focused EXCLUSIVELY on the latest user query.
Your goal is to synthesize all previous findings and provide a comprehensive conclusion that directly addresses this specific topic and ONLY this topic.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- This is the final iteration of the research process
- CAREFULLY review the entire conversation history to understand all previous findings
- Synthesize ALL findings from previous iterations into a comprehensive conclusion
- Start with "## Final Conclusion"
- Your conclusion MUST directly address the original question
- Stay STRICTLY focused on the specific topic - do not drift to related topics
- Include specific code references and implementation details related to the topic
- Highlight the most important discoveries and insights about this specific functionality
- Provide a complete and definitive answer to the original question
- Do NOT include general repository information unless directly relevant to the query
- Focus exclusively on the specific topic being researched
- NEVER respond with "Continue the research" as an answer - always provide a complete conclusion
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- Ensure your conclusion builds on and references key findings from previous iterations
</guidelines>

<style>
- Be concise but thorough
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
- Structure your response with clear headings
- End with actionable insights or recommendations when appropriate
</style>"""

DEEP_RESEARCH_INTERMEDIATE_ITERATION_PROMPT = """<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are currently in iteration {research_iteration} of a Deep Research process focused EXCLUSIVELY on the latest user query.
Your goal is to build upon previous research iterations and go deeper into this specific topic without deviating from it.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- CAREFULLY review the conversation history to understand what has been researched so far
- Your response MUST build on previous research iterations - do not repeat information already covered
- Identify gaps or areas that need further exploration related to this specific topic
- Focus on one specific aspect that needs deeper investigation in this iteration
- Start your response with "## Research Update {{research_iteration}}"
- Clearly explain what you're investigating in this iteration
- Provide new insights that weren't covered in previous iterations
- If this is iteration 3, prepare for a final conclusion in the next iteration
- Do NOT include general repository information unless directly relevant to the query
- Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
- Your research MUST directly address the original question
- Maintain continuity with previous research iterations - this is a continuous investigation
</guidelines>

<style>
- Be concise but thorough
- Focus on providing new information, not repeating what's already been covered
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
</style>"""

SIMPLE_CHAT_SYSTEM_PROMPT = """<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You provide direct, concise, and accurate information about code repositories.
You NEVER start responses with markdown headers or code fences.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- Answer the user's question directly without ANY preamble or filler phrases
- DO NOT include any rationale, explanation, or extra comments.
- DO NOT start with preambles like "Okay, here's a breakdown" or "Here's an explanation"
- DO NOT start with markdown headers like "## Analysis of..." or any file path references
- DO NOT start with ```markdown code fences
- DO NOT end your response with ``` closing fences
- DO NOT start by repeating or acknowledging the question
- JUST START with the direct answer to the question

<example_of_what_not_to_do>
```markdown
## Analysis of `adalflow/adalflow/datasets/gsm8k.py`

This file contains...
```
</example_of_what_not_to_do>

- Format your response with proper markdown including headings, lists, and code blocks WITHIN your answer
- For code analysis, organize your response with clear sections
- Think step by step and structure your answer logically
- Start with the most relevant information that directly addresses the user's query
- Be precise and technical when discussing code
- Your response language should be in the same language as the user's query
</guidelines>

<style>
- Use concise, direct language
- Prioritize accuracy over verbosity
- When showing code, include line numbers and file paths when relevant
- Use markdown formatting to improve readability
</style>"""


================================================
FILE: api/pyproject.toml
================================================
[project]
name = "open-deepwiki-api"
version = "1.0.0"
description = "Backend API for DeepWiki, providing smart code analysis and AI-powered documentation generation."
license = {text = "MIT License"}

[tool.poetry]
package-mode = false

[tool.poetry.dependencies]
python = "^3.11"
fastapi = ">=0.95.0"
uvicorn = { extras = ["standard"], version = ">=0.21.1" }
pydantic = ">=2.0.0"
google-generativeai = ">=0.3.0"
tiktoken = ">=0.5.0"
adalflow = ">=0.1.0"
numpy = ">=1.24.0"
faiss-cpu = ">=1.7.4"
langid = ">=1.1.6"
requests = ">=2.28.0"
jinja2 = ">=3.1.2"
python-dotenv = ">=1.0.0"
openai = ">=1.76.2"
ollama = ">=0.4.8"
aiohttp = ">=3.8.4"
boto3 = ">=1.34.0"
websockets = ">=11.0.3"
azure-identity = ">=1.12.0"
azure-core = ">=1.24.0"


[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.group.dev.dependencies]
pytest = ">=7.0.0"


================================================
FILE: api/rag.py
================================================
import logging
import weakref
import re
from dataclasses import dataclass
from typing import Any, List, Tuple, Dict
from uuid import uuid4

import adalflow as adal

from api.tools.embedder import get_embedder
from api.prompts import RAG_SYSTEM_PROMPT as system_prompt, RAG_TEMPLATE

# Create our own implementation of the conversation classes
@dataclass
class UserQuery:
    query_str: str

@dataclass
class AssistantResponse:
    response_str: str

@dataclass
class DialogTurn:
    id: str
    user_query: UserQuery
    assistant_response: AssistantResponse

class CustomConversation:
    """Custom implementation of Conversation to fix the list assignment index out of range error"""

    def __init__(self):
        self.dialog_turns = []

    def append_dialog_turn(self, dialog_turn):
        """Safely append a dialog turn to the conversation"""
        if not hasattr(self, 'dialog_turns'):
            self.dialog_turns = []
        self.dialog_turns.append(dialog_turn)

# Import other adalflow components
from adalflow.components.retriever.faiss_retriever import FAISSRetriever
from api.config import configs
from api.data_pipeline import DatabaseManager

# Configure logging
logger = logging.getLogger(__name__)

# Maximum token limit for embedding models
MAX_INPUT_TOKENS = 7500  # Safe threshold below 8192 token limit

class Memory(adal.core.component.DataComponent):
    """Simple conversation management with a list of dialog turns."""

    def __init__(self):
        super().__init__()
        # Use our custom implementation instead of the original Conversation class
        self.current_conversation = CustomConversation()

    def call(self) -> Dict:
        """Return the conversation history as a dictionary."""
        all_dialog_turns = {}
        try:
            # Check if dialog_turns exists and is a list
            if hasattr(self.current_conversation, 'dialog_turns'):
                if self.current_conversation.dialog_turns:
                    logger.info(f"Memory content: {len(self.current_conversation.dialog_turns)} turns")
                    for i, turn in enumerate(self.current_conversation.dialog_turns):
                        if hasattr(turn, 'id') and turn.id is not None:
                            all_dialog_turns[turn.id] = turn
                            logger.info(f"Added turn {i+1} with ID {turn.id} to memory")
                        else:
                            logger.warning(f"Skipping invalid turn object in memory: {turn}")
                else:
                    logger.info("Dialog turns list exists but is empty")
            else:
                logger.info("No dialog_turns attribute in current_conversation")
                # Try to initialize it
                self.current_conversation.dialog_turns = []
        except Exception as e:
            logger.error(f"Error accessing dialog turns: {str(e)}")
            # Try to recover
            try:
                self.current_conversation = CustomConversation()
                logger.info("Recovered by creating new conversation")
            except Exception as e2:
                logger.error(f"Failed to recover: {str(e2)}")

        logger.info(f"Returning {len(all_dialog_turns)} dialog turns from memory")
        return all_dialog_turns

    def add_dialog_turn(self, user_query: str, assistant_response: str) -> bool:
        """
        Add a dialog turn to the conversation history.

        Args:
            user_query: The user's query
            assistant_response: The assistant's response

        Returns:
            bool: True if successful, False otherwise
        """
        try:
            # Create a new dialog turn using our custom implementation
            dialog_turn = DialogTurn(
                id=str(uuid4()),
                user_query=UserQuery(query_str=user_query),
                assistant_response=AssistantResponse(response_str=assistant_response),
            )

            # Make sure the current_conversation has the append_dialog_turn method
            if not hasattr(self.current_conversation, 'append_dialog_turn'):
                logger.warning("current_conversation does not have append_dialog_turn method, creating new one")
                # Initialize a new conversation if needed
                self.current_conversation = CustomConversation()

            # Ensure dialog_turns exists
            if not hasattr(self.current_conversation, 'dialog_turns'):
                logger.warning("dialog_turns not found, initializing empty list")
                self.current_conversation.dialog_turns = []

            # Safely append the dialog turn
            self.current_conversation.dialog_turns.append(dialog_turn)
            logger.info(f"Successfully added dialog turn, now have {len(self.current_conversation.dialog_turns)} turns")
            return True

        except Exception as e:
            logger.error(f"Error adding dialog turn: {str(e)}")
            # Try to recover by creating a new conversation
            try:
                self.current_conversation = CustomConversation()
                dialog_turn = DialogTurn(
                    id=str(uuid4()),
                    user_query=UserQuery(query_str=user_query),
                    assistant_response=AssistantResponse(response_str=assistant_response),
                )
                self.current_conversation.dialog_turns.append(dialog_turn)
                logger.info("Recovered from error by creating new conversation")
                return True
            except Exception as e2:
                logger.error(f"Failed to recover from error: {str(e2)}")
                return False


from dataclasses import dataclass, field

@dataclass
class RAGAnswer(adal.DataClass):
    rationale: str = field(default="", metadata={"desc": "Chain of thoughts for the answer."})
    answer: str = field(default="", metadata={"desc": "Answer to the user query, formatted in markdown for beautiful rendering with react-markdown. DO NOT include ``` triple backticks fences at the beginning or end of your answer."})

    __output_fields__ = ["rationale", "answer"]

class RAG(adal.Component):
    """RAG with one repo.
    If you want to load a new repos, call prepare_retriever(repo_url_or_path) first."""

    def __init__(self, provider="google", model=None, use_s3: bool = False):  # noqa: F841 - use_s3 is kept for compatibility
        """
        Initialize the RAG component.

        Args:
            provider: Model provider to use (google, openai, openrouter, ollama)
            model: Model name to use with the provider
            use_s3: Whether to use S3 for database storage (default: False)
        """
        super().__init__()

        self.provider = provider
        self.model = model

        # Import the helper functions
        from api.config import get_embedder_config, get_embedder_type

        # Determine embedder type based on current configuration
        self.embedder_type = get_embedder_type()
        self.is_ollama_embedder = (self.embedder_type == 'ollama')  # Backward compatibility

        # Check if Ollama model exists before proceeding
        if self.is_ollama_embedder:
            from api.ollama_patch import check_ollama_model_exists
            from api.config import get_embedder_config
            
            embedder_config = get_embedder_config()
            if embedder_config and embedder_config.get("model_kwargs", {}).get("model"):
                model_name = embedder_config["model_kwargs"]["model"]
                if not check_ollama_model_exists(model_name):
                    raise Exception(f"Ollama model '{model_name}' not found. Please run 'ollama pull {model_name}' to install it.")

        # Initialize components
        self.memory = Memory()
        self.embedder = get_embedder(embedder_type=self.embedder_type)

        self_weakref = weakref.ref(self)
        # Patch: ensure query embedding is always single string for Ollama
        def single_string_embedder(query):
            # Accepts either a string or a list, always returns embedding for a single string
            if isinstance(query, list):
                if len(query) != 1:
                    raise ValueError("Ollama embedder only supports a single string")
                query = query[0]
            instance = self_weakref()
            assert instance is not None, "RAG instance is no longer available, but the query embedder was called."
            return instance.embedder(input=query)

        # Use single string embedder for Ollama, regular embedder for others
        self.query_embedder = single_string_embedder if self.is_ollama_embedder else self.embedder

        self.initialize_db_manager()

        # Set up the output parser
        data_parser = adal.DataClassParser(data_class=RAGAnswer, return_data_class=True)

        # Format instructions to ensure proper output structure
        format_instructions = data_parser.get_output_format_str() + """

IMPORTANT FORMATTING RULES:
1. DO NOT include your thinking or reasoning process in the output
2. Provide only the final, polished answer
3. DO NOT include ```markdown fences at the beginning or end of your answer
4. DO NOT wrap your response in any kind of fences
5. Start your response directly with the content
6. The content will already be rendered as markdown
7. Do not use backslashes before special characters like [ ] { } in your answer
8. When listing tags or similar items, write them as plain text without escape characters
9. For pipe characters (|) in text, write them directly without escaping them"""

        # Get model configuration based on provider and model
        from api.config import get_model_config
        generator_config = get_model_config(self.provider, self.model)

        # Set up the main generator
        self.generator = adal.Generator(
            template=RAG_TEMPLATE,
            prompt_kwargs={
                "output_format_str": format_instructions,
                "conversation_history": self.memory(),
                "system_prompt": system_prompt,
                "contexts": None,
            },
            model_client=generator_config["model_client"](),
            model_kwargs=generator_config["model_kwargs"],
            output_processors=data_parser,
        )


    def initialize_db_manager(self):
        """Initialize the database manager with local storage"""
        self.db_manager = DatabaseManager()
        self.transformed_docs = []

    def _validate_and_filter_embeddings(self, documents: List) -> List:
        """
        Validate embeddings and filter out documents with invalid or mismatched embedding sizes.

        Args:
            documents: List of documents with embeddings

        Returns:
            List of documents with valid embeddings of consistent size
        """
        if not documents:
            logger.warning("No documents provided for embedding validation")
            return []

        valid_documents = []
        embedding_sizes = {}

        # First pass: collect all embedding sizes and count occurrences
        for i, doc in enumerate(documents):
            if not hasattr(doc, 'vector') or doc.vector is None:
                logger.warning(f"Document {i} has no embedding vector, skipping")
                continue

            try:
                if isinstance(doc.vector, list):
                    embedding_size = len(doc.vector)
                elif hasattr(doc.vector, 'shape'):
                    embedding_size = doc.vector.shape[0] if len(doc.vector.shape) == 1 else doc.vector.shape[-1]
                elif hasattr(doc.vector, '__len__'):
                    embedding_size = len(doc.vector)
                else:
                    logger.warning(f"Document {i} has invalid embedding vector type: {type(doc.vector)}, skipping")
                    continue

                if embedding_size == 0:
                    logger.warning(f"Document {i} has empty embedding vector, skipping")
                    continue

                embedding_sizes[embedding_size] = embedding_sizes.get(embedding_size, 0) + 1

            except Exception as e:
                logger.warning(f"Error checking embedding size for document {i}: {str(e)}, skipping")
                continue

        if not embedding_sizes:
            logger.error("No valid embeddings found in any documents")
            return []

        # Find the most common embedding size (this should be the correct one)
        target_size = max(embedding_sizes.keys(), key=lambda k: embedding_sizes[k])
        logger.info(f"Target embedding size: {target_size} (found in {embedding_sizes[target_size]} documents)")

        # Log all embedding sizes found
        for size, count in embedding_sizes.items():
            if size != target_size:
                logger.warning(f"Found {count} documents with incorrect embedding size {size}, will be filtered out")

        # Second pass: filter documents with the target embedding size
        for i, doc in enumerate(documents):
            if not hasattr(doc, 'vector') or doc.vector is None:
                continue

            try:
                if isinstance(doc.vector, list):
                    embedding_size = len(doc.vector)
                elif hasattr(doc.vector, 'shape'):
                    embedding_size = doc.vector.shape[0] if len(doc.vector.shape) == 1 else doc.vector.shape[-1]
                elif hasattr(doc.vector, '__len__'):
                    embedding_size = len(doc.vector)
                else:
                    continue

                if embedding_size == target_size:
                    valid_documents.append(doc)
                else:
                    # Log which document is being filtered out
                    file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
                    logger.warning(f"Filtering out document '{file_path}' due to embedding size mismatch: {embedding_size} != {target_size}")

            except Exception as e:
                file_path = getattr(doc, 'meta_data', {}).get('file_path', f'document_{i}')
                logger.warning(f"Error validating embedding for document '{file_path}': {str(e)}, skipping")
                continue

        logger.info(f"Embedding validation complete: {len(valid_documents)}/{len(documents)} documents have valid embeddings")

        if len(valid_documents) == 0:
            logger.error("No documents with valid embeddings remain after filtering")
        elif len(valid_documents) < len(documents):
            filtered_count = len(documents) - len(valid_documents)
            logger.warning(f"Filtered out {filtered_count} documents due to embedding issues")

        return valid_documents

    def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None,
                      excluded_dirs: List[str] = None, excluded_files: List[str] = None,
                      included_dirs: List[str] = None, included_files: List[str] = None):
        """
        Prepare the retriever for a repository.
        Will load database from local storage if available.

        Args:
            repo_url_or_path: URL or local path to the repository
            access_token: Optional access token for private repositories
            excluded_dirs: Optional list of directories to exclude from processing
            excluded_files: Optional list of file patterns to exclude from processing
            included_dirs: Optional list of directories to include exclusively
            included_files: Optional list of file patterns to include exclusively
        """
        self.initialize_db_manager()
        self.repo_url_or_path = repo_url_or_path
        self.transformed_docs = self.db_manager.prepare_database(
            repo_url_or_path,
            type,
            access_token,
            embedder_type=self.embedder_type,
            excluded_dirs=excluded_dirs,
            excluded_files=excluded_files,
            included_dirs=included_dirs,
            included_files=included_files
        )
        logger.info(f"Loaded {len(self.transformed_docs)} documents for retrieval")

        # Validate and filter embeddings to ensure consistent sizes
        self.transformed_docs = self._validate_and_filter_embeddings(self.transformed_docs)

        if not self.transformed_docs:
            raise ValueError("No valid documents with embeddings found. Cannot create retriever.")

        logger.info(f"Using {len(self.transformed_docs)} documents with valid embeddings for retrieval")

        try:
            # Use the appropriate embedder for retrieval
            retrieve_embedder = self.query_embedder if self.is_ollama_embedder else self.embedder
            self.retriever = FAISSRetriever(
                **configs["retriever"],
                embedder=retrieve_embedder,
                documents=self.transformed_docs,
                document_map_func=lambda doc: doc.vector,
            )
            logger.info("FAISS retriever created successfully")
        except Exception as e:
            logger.error(f"Error creating FAISS retriever: {str(e)}")
            # Try to provide more specific error information
            if "All embeddings should be of the same size" in str(e):
                logger.error("Embedding size validation failed. This suggests there are still inconsistent embedding sizes.")
                # Log embedding sizes for debugging
                sizes = []
                for i, doc in enumerate(self.transformed_docs[:10]):  # Check first 10 docs
                    if hasattr(doc, 'vector') and doc.vector is not None:
                        try:
                            if isinstance(doc.vector, list):
                                size = len(doc.vector)
                            elif hasattr(doc.vector, 'shape'):
                                size = doc.vector.shape[0] if len(doc.vector.shape) == 1 else doc.vector.shape[-1]
                            elif hasattr(doc.vector, '__len__'):
                                size = len(doc.vector)
                            else:
                                size = "unknown"
                            sizes.append(f"doc_{i}: {size}")
                        except Exception:
                            sizes.append(f"doc_{i}: error")
                logger.error(f"Sample embedding sizes: {', '.join(sizes)}")
            raise

    def call(self, query: str, language: str = "en") -> Tuple[List]:
        """
        Process a query using RAG.

        Args:
            query: The user's query

        Returns:
            Tuple of (RAGAnswer, retrieved_documents)
        """
        try:
            retrieved_documents = self.retriever(query)

            # Fill in the documents
            retrieved_documents[0].documents = [
                self.transformed_docs[doc_index]
                for doc_index in retrieved_documents[0].doc_indices
            ]

            return retrieved_documents

        except Exception as e:
            logger.error(f"Error in RAG call: {str(e)}")

            # Create error response
            error_response = RAGAnswer(
                rationale="Error occurred while processing the query.",
                answer=f"I apologize, but I encountered an error while processing your question. Please try again or rephrase your question."
            )
            return error_response, []


================================================
FILE: api/simple_chat.py
================================================
import logging
import os
from typing import List, Optional
from urllib.parse import unquote

import google.generativeai as genai
from adalflow.components.model_client.ollama_client import OllamaClient
from adalflow.core.types import ModelType
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field

from api.config import get_model_config, configs, OPENROUTER_API_KEY, OPENAI_API_KEY, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
from api.data_pipeline import count_tokens, get_file_content
from api.openai_client import OpenAIClient
from api.openrouter_client import OpenRouterClient
from api.bedrock_client import BedrockClient
from api.azureai_client import AzureAIClient
from api.dashscope_client import DashscopeClient
from api.rag import RAG
from api.prompts import (
    DEEP_RESEARCH_FIRST_ITERATION_PROMPT,
    DEEP_RESEARCH_FINAL_ITERATION_PROMPT,
    DEEP_RESEARCH_INTERMEDIATE_ITERATION_PROMPT,
    SIMPLE_CHAT_SYSTEM_PROMPT
)

# Configure logging
from api.logging_config import setup_logging

setup_logging()
logger = logging.getLogger(__name__)


# Initialize FastAPI app
app = FastAPI(
    title="Simple Chat API",
    description="Simplified API for streaming chat completions"
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins
    allow_credentials=True,
    allow_methods=["*"],  # Allows all methods
    allow_headers=["*"],  # Allows all headers
)

# Models for the API
class ChatMessage(BaseModel):
    role: str  # 'user' or 'assistant'
    content: str

class ChatCompletionRequest(BaseModel):
    """
    Model for requesting a chat completion.
    """
    repo_url: str = Field(..., description="URL of the repository to query")
    messages: List[ChatMessage] = Field(..., description="List of chat messages")
    filePath: Optional[str] = Field(None, description="Optional path to a file in the repository to include in the prompt")
    token: Optional[str] = Field(None, description="Personal access token for private repositories")
    type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')")

    # model parameters
    provider: str = Field("google", description="Model provider (google, openai, openrouter, ollama, bedrock, azure, dashscope)")
    model: Optional[str] = Field(None, description="Model name for the specified provider")

    language: Optional[str] = Field("en", description="Language for content generation (e.g., 'en', 'ja', 'zh', 'es', 'kr', 'vi')")
    excluded_dirs: Optional[str] = Field(None, description="Comma-separated list of directories to exclude from processing")
    excluded_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to exclude from processing")
    included_dirs: Optional[str] = Field(None, description="Comma-separated list of directories to include exclusively")
    included_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to include exclusively")

@app.post("/chat/completions/stream")
async def chat_completions_stream(request: ChatCompletionRequest):
    """Stream a chat completion response directly using Google Generative AI"""
    try:
        # Check if request contains very large input
        input_too_large = False
        if request.messages and len(request.messages) > 0:
            last_message = request.messages[-1]
            if hasattr(last_message, 'content') and last_message.content:
                tokens = count_tokens(last_message.content, request.provider == "ollama")
                logger.info(f"Request size: {tokens} tokens")
                if tokens > 8000:
                    logger.warning(f"Request exceeds recommended token limit ({tokens} > 7500)")
                    input_too_large = True

        # Create a new RAG instance for this request
        try:
            request_rag = RAG(provider=request.provider, model=request.model)

            # Extract custom file filter parameters if provided
            excluded_dirs = None
            excluded_files = None
            included_dirs = None
            included_files = None

            if request.excluded_dirs:
                excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
                logger.info(f"Using custom excluded directories: {excluded_dirs}")
            if request.excluded_files:
                excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
                logger.info(f"Using custom excluded files: {excluded_files}")
            if request.included_dirs:
                included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
                logger.info(f"Using custom included directories: {included_dirs}")
            if request.included_files:
                included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
                logger.info(f"Using custom included files: {included_files}")

            request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
            logger.info(f"Retriever prepared for {request.repo_url}")
        except ValueError as e:
            if "No valid documents with embeddings found" in str(e):
                logger.error(f"No valid embeddings found: {str(e)}")
                raise HTTPException(status_code=500, detail="No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
            else:
                logger.error(f"ValueError preparing retriever: {str(e)}")
                raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}")
        except Exception as e:
            logger.error(f"Error preparing retriever: {str(e)}")
            # Check for specific embedding-related errors
            if "All embeddings should be of the same size" in str(e):
                raise HTTPException(status_code=500, detail="Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
            else:
                raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}")

        # Validate request
        if not request.messages or len(request.messages) == 0:
            raise HTTPException(status_code=400, detail="No messages provided")

        last_message = request.messages[-1]
        if last_message.role != "user":
            raise HTTPException(status_code=400, detail="Last message must be from the user")

        # Process previous messages to build conversation history
        for i in range(0, len(request.messages) - 1, 2):
            if i + 1 < len(request.messages):
                user_msg = request.messages[i]
                assistant_msg = request.messages[i + 1]

                if user_msg.role == "user" and assistant_msg.role == "assistant":
                    request_rag.memory.add_dialog_turn(
                        user_query=user_msg.content,
                        assistant_response=assistant_msg.content
                    )

        # Check if this is a Deep Research request
        is_deep_research = False
        research_iteration = 1

        # Process messages to detect Deep Research requests
        for msg in request.messages:
            if hasattr(msg, 'content') and msg.content and "[DEEP RESEARCH]" in msg.content:
                is_deep_research = True
                # Only remove the tag from the last message
                if msg == request.messages[-1]:
                    # Remove the Deep Research tag
                    msg.content = msg.content.replace("[DEEP RESEARCH]", "").strip()

        # Count research iterations if this is a Deep Research request
        if is_deep_research:
            research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
            logger.info(f"Deep Research request detected - iteration {research_iteration}")

            # Check if this is a continuation request
            if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
                # Find the original topic from the first user message
                original_topic = None
                for msg in request.messages:
                    if msg.role == "user" and "continue" not in msg.content.lower():
                        original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
                        logger.info(f"Found original research topic: {original_topic}")
                        break

                if original_topic:
                    # Replace the continuation message with the original topic
                    last_message.content = original_topic
                    logger.info(f"Using original topic for research: {original_topic}")

        # Get the query from the last message
        query = last_message.content

        # Only retrieve documents if input is not too large
        context_text = ""
        retrieved_documents = None

        if not input_too_large:
            try:
                # If filePath exists, modify the query for RAG to focus on the file
                rag_query = query
                if request.filePath:
                    # Use the file path to get relevant context about the file
                    rag_query = f"Contexts related to {request.filePath}"
                    logger.info(f"Modified RAG query to focus on file: {request.filePath}")

                # Try to perform RAG retrieval
                try:
                    # This will use the actual RAG implementation
                    retrieved_documents = request_rag(rag_query, language=request.language)

                    if retrieved_documents and retrieved_documents[0].documents:
                        # Format context for the prompt in a more structured way
                        documents = retrieved_documents[0].documents
                        logger.info(f"Retrieved {len(documents)} documents")

                        # Group documents by file path
                        docs_by_file = {}
                        for doc in documents:
                            file_path = doc.meta_data.get('file_path', 'unknown')
                            if file_path not in docs_by_file:
                                docs_by_file[file_path] = []
                            docs_by_file[file_path].append(doc)

                        # Format context text with file path grouping
                        context_parts = []
                        for file_path, docs in docs_by_file.items():
                            # Add file header with metadata
                            header = f"## File Path: {file_path}\n\n"
                            # Add document content
                            content = "\n\n".join([doc.text for doc in docs])

                            context_parts.append(f"{header}{content}")

                        # Join all parts with clear separation
                        context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
                    else:
                        logger.warning("No documents retrieved from RAG")
                except Exception as e:
                    logger.error(f"Error in RAG retrieval: {str(e)}")
                    # Continue without RAG if there's an error

            except Exception as e:
                logger.error(f"Error retrieving documents: {str(e)}")
                context_text = ""

        # Get repository information
        repo_url = request.repo_url
        repo_name = repo_url.split("/")[-1] if "/" in repo_url else repo_url

        # Determine repository type
        repo_type = request.type

        # Get language information
        language_code = request.language or configs["lang_config"]["default"]
        supported_langs = configs["lang_config"]["supported_languages"]
        language_name = supported_langs.get(language_code, "English")

        # Create system prompt
        if is_deep_research:
            # Check if this is the first iteration
            is_first_iteration = research_iteration == 1

            # Check if this is the final iteration
            is_final_iteration = research_iteration >= 5

            if is_first_iteration:
                system_prompt = DEEP_RESEARCH_FIRST_ITERATION_PROMPT.format(
                    repo_type=repo_type,
                    repo_url=repo_url,
                    repo_name=repo_name,
                    language_name=language_name
                )
            elif is_final_iteration:
                system_prompt = DEEP_RESEARCH_FINAL_ITERATION_PROMPT.format(
                    repo_type=repo_type,
                    repo_url=repo_url,
                    repo_name=repo_name,
                    research_iteration=research_iteration,
                    language_name=language_name
                )
            else:
                system_prompt = DEEP_RESEARCH_INTERMEDIATE_ITERATION_PROMPT.format(
                    repo_type=repo_type,
                    repo_url=repo_url,
                    repo_name=repo_name,
                    research_iteration=research_iteration,
                    language_name=language_name
                )
        else:
            system_prompt = SIMPLE_CHAT_SYSTEM_PROMPT.format(
                repo_type=repo_type,
                repo_url=repo_url,
                repo_name=repo_name,
                language_name=language_name
            )

        # Fetch file content if provided
        file_content = ""
        if request.filePath:
            try:
                file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
                logger.info(f"Successfully retrieved content for file: {request.filePath}")
            except Exception as e:
                logger.error(f"Error retrieving file content: {str(e)}")
                # Continue without file content if there's an error

        # Format conversation history
        conversation_history = ""
        for turn_id, turn in request_rag.memory().items():
            if not isinstance(turn_id, int) and hasattr(turn, 'user_query') and hasattr(turn, 'assistant_response'):
                conversation_history += f"<turn>\n<user>{turn.user_query.query_str}</user>\n<assistant>{turn.assistant_response.response_str}</assistant>\n</turn>\n"

        # Create the prompt with context
        prompt = f"/no_think {system_prompt}\n\n"

        if conversation_history:
            prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"

        # Check if filePath is provided and fetch file content if it exists
        if file_content:
            # Add file content to the prompt after conversation history
            prompt += f"<currentFileContent path=\"{request.filePath}\">\n{file_content}\n</currentFileContent>\n\n"

        # Only include context if it's not empty
        CONTEXT_START = "<START_OF_CONTEXT>"
        CONTEXT_END = "<END_OF_CONTEXT>"
        if context_text.strip():
            prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
        else:
            # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
            logger.info("No context available from RAG")
            prompt += "<note>Answering without retrieval augmentation.</note>\n\n"

        prompt += f"<query>\n{query}\n</query>\n\nAssistant: "

        model_config = get_model_config(request.provider, request.model)["model_kwargs"]

        if request.provider == "ollama":
            prompt += " /no_think"

            model = OllamaClient()
            model_kwargs = {
                "model": model_config["model"],
                "stream": True,
                "options": {
                    "temperature": model_config["temperature"],
                    "top_p": model_config["top_p"],
                    "num_ctx": model_config["num_ctx"]
                }
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "openrouter":
            logger.info(f"Using OpenRouter with model: {request.model}")

            # Check if OpenRouter API key is set
            if not OPENROUTER_API_KEY:
                logger.warning("OPENROUTER_API_KEY not configured, but continuing with request")
                # We'll let the OpenRouterClient handle this and return a friendly error message

            model = OpenRouterClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"]
            }
            # Only add top_p if it exists in the model config
            if "top_p" in model_config:
                model_kwargs["top_p"] = model_config["top_p"]

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "openai":
            logger.info(f"Using Openai protocol with model: {request.model}")

            # Check if an API key is set for Openai
            if not OPENAI_API_KEY:
                logger.warning("OPENAI_API_KEY not configured, but continuing with request")
                # We'll let the OpenAIClient handle this and return an error message

            # Initialize Openai client
            model = OpenAIClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"]
            }
            # Only add top_p if it exists in the model config
            if "top_p" in model_config:
                model_kwargs["top_p"] = model_config["top_p"]

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "bedrock":
            logger.info(f"Using AWS Bedrock with model: {request.model}")

            # Check if AWS credentials are set
            if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
                logger.warning("AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY not configured, but continuing with request")
                # We'll let the BedrockClient handle this and return an error message

            # Initialize Bedrock client
            model = BedrockClient()
            model_kwargs = {
                "model": request.model,
                "temperature": model_config["temperature"],
                "top_p": model_config["top_p"]
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "azure":
            logger.info(f"Using Azure AI with model: {request.model}")

            # Initialize Azure AI client
            model = AzureAIClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"],
                "top_p": model_config["top_p"]
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "dashscope":
            logger.info(f"Using Dashscope with model: {request.model}")

            model = DashscopeClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"],
                "top_p": model_config["top_p"],
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM,
            )
        else:
            # Initialize Google Generative AI model (default provider)
            model = genai.GenerativeModel(
                model_name=model_config["model"],
                generation_config={
                    "temperature": model_config["temperature"],
                    "top_p": model_config["top_p"],
                    "top_k": model_config["top_k"],
                },
            )

        # Create a streaming response
        async def response_stream():
            try:
                if request.provider == "ollama":
                    # Get the response and handle it properly using the previously created api_kwargs
                    response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                    # Handle streaming response from Ollama
                    async for chunk in response:
                        text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk)
                        if text and not text.startswith('model=') and not text.startswith('created_at='):
                            text = text.replace('<think>', '').replace('</think>', '')
                            yield text
                elif request.provider == "openrouter":
                    try:
                        # Get the response and handle it properly using the previously created api_kwargs
                        logger.info("Making OpenRouter API call")
                        response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                        # Handle streaming response from OpenRouter
                        async for chunk in response:
                            yield chunk
                    except Exception as e_openrouter:
                        logger.error(f"Error with OpenRouter API: {str(e_openrouter)}")
                        yield f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
                elif request.provider == "openai":
                    try:
                        # Get the response and handle it properly using the previously created api_kwargs
                        logger.info("Making Openai API call")
                        response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                        # Handle streaming response from Openai
                        async for chunk in response:
                           choices = getattr(chunk, "choices", [])
                           if len(choices) > 0:
                               delta = getattr(choices[0], "delta", None)
                               if delta is not None:
                                    text = getattr(delta, "content", None)
                                    if text is not None:
                                        yield text
                    except Exception as e_openai:
                        logger.error(f"Error with Openai API: {str(e_openai)}")
                        yield f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
                elif request.provider == "bedrock":
                    try:
                        # Get the response and handle it properly using the previously created api_kwargs
                        logger.info("Making AWS Bedrock API call")
                        response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                        # Handle response from Bedrock (not streaming yet)
                        if isinstance(response, str):
                            yield response
                        else:
                            # Try to extract text from the response
                            yield str(response)
                    except Exception as e_bedrock:
                        logger.error(f"Error with AWS Bedrock API: {str(e_bedrock)}")
                        yield f"\nError with AWS Bedrock API: {str(e_bedrock)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
                elif request.provider == "azure":
                    try:
                        # Get the response and handle it properly using the previously created api_kwargs
                        logger.info("Making Azure AI API call")
                        response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                        # Handle streaming response from Azure AI
                        async for chunk in response:
                            choices = getattr(chunk, "choices", [])
                            if len(choices) > 0:
                                delta = getattr(choices[0], "delta", None)
                                if delta is not None:
                                    text = getattr(delta, "content", None)
                                    if text is not None:
                                        yield text
                    except Exception as e_azure:
                        logger.error(f"Error with Azure AI API: {str(e_azure)}")
                        yield f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
                elif request.provider == "dashscope":
                    try:
                        logger.info("Making Dashscope API call")
                        response = await model.acall(
                            api_kwargs=api_kwargs, model_type=ModelType.LLM
                        )
                        # DashscopeClient.acall with stream=True returns an async
                        # generator of text chunks
                        async for text in response:
                            if text:
                                yield text
                    except Exception as e_dashscope:
                        logger.error(f"Error with Dashscope API: {str(e_dashscope)}")
                        yield (
                            f"\nError with Dashscope API: {str(e_dashscope)}\n\n"
                            "Please check that you have set the DASHSCOPE_API_KEY (and optionally "
                            "DASHSCOPE_WORKSPACE_ID) environment variables with valid values."
                        )
                else:
                    # Google Generative AI (default provider)
                    response = model.generate_content(prompt, stream=True)
                    for chunk in response:
                        if hasattr(chunk, "text"):
                            yield chunk.text

            except Exception as e_outer:
                logger.error(f"Error in streaming response: {str(e_outer)}")
                error_message = str(e_outer)

                # Check for token limit errors
                if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
                    # If we hit a token limit error, try again without context
                    logger.warning("Token limit exceeded, retrying without context")
                    try:
                        # Create a simplified prompt without context
                        simplified_prompt = f"/no_think {system_prompt}\n\n"
                        if conversation_history:
                            simplified_prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"

                        # Include file content in the fallback prompt if it was retrieved
                        if request.filePath and file_content:
                            simplified_prompt += f"<currentFileContent path=\"{request.filePath}\">\n{file_content}\n</currentFileContent>\n\n"

                        simplified_prompt += "<note>Answering without retrieval augmentation due to input size constraints.</note>\n\n"
                        simplified_prompt += f"<query>\n{query}\n</query>\n\nAssistant: "

                        if request.provider == "ollama":
                            simplified_prompt += " /no_think"

                            # Create new api_kwargs with the simplified prompt
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM
                            )

                            # Get the response using the simplified prompt
                            fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                            # Handle streaming fallback_response from Ollama
                            async for chunk in fallback_response:
                                text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk)
                                if text and not text.startswith('model=') and not text.startswith('created_at='):
                                    text = text.replace('<think>', '').replace('</think>', '')
                                    yield text
                        elif request.provider == "openrouter":
                            try:
                                # Create new api_kwargs with the simplified prompt
                                fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                    input=simplified_prompt,
                                    model_kwargs=model_kwargs,
                                    model_type=ModelType.LLM
                                )

                                # Get the response using the simplified prompt
                                logger.info("Making fallback OpenRouter API call")
                                fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                                # Handle streaming fallback_response from OpenRouter
                                async for chunk in fallback_response:
                                    yield chunk
                            except Exception as e_fallback:
                                logger.error(f"Error with OpenRouter API fallback: {str(e_fallback)}")
                                yield f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
                        elif request.provider == "openai":
                            try:
                                # Create new api_kwargs with the simplified prompt
                                fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                    input=simplified_prompt,
                                    model_kwargs=model_kwargs,
                                    model_type=ModelType.LLM
                                )

                                # Get the response using the simplified prompt
                                logger.info("Making fallback Openai API call")
                                fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                                # Handle streaming fallback_response from Openai
                                async for chunk in fallback_response:
                                    text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
                                    yield text
                            except Exception as e_fallback:
                                logger.error(f"Error with Openai API fallback: {str(e_fallback)}")
                                yield f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
                        elif request.provider == "bedrock":
                            try:
                                # Create new api_kwargs with the simplified prompt
                                fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                    input=simplified_prompt,
                                    model_kwargs=model_kwargs,
                                    model_type=ModelType.LLM
                                )

                                # Get the response using the simplified prompt
                                logger.info("Making fallback AWS Bedrock API call")
                                fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                                # Handle response from Bedrock
                                if isinstance(fallback_response, str):
                                    yield fallback_response
                                else:
                                    # Try to extract text from the response
                                    yield str(fallback_response)
                            except Exception as e_fallback:
                                logger.error(f"Error with AWS Bedrock API fallback: {str(e_fallback)}")
                                yield f"\nError with AWS Bedrock API fallback: {str(e_fallback)}\n\nPlease check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables with valid credentials."
                        elif request.provider == "azure":
                            try:
                                # Create new api_kwargs with the simplified prompt
                                fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                    input=simplified_prompt,
                                    model_kwargs=model_kwargs,
                                    model_type=ModelType.LLM
                                )

                                # Get the response using the simplified prompt
                                logger.info("Making fallback Azure AI API call")
                                fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                                # Handle streaming fallback response from Azure AI
                                async for chunk in fallback_response:
                                    choices = getattr(chunk, "choices", [])
                                    if len(choices) > 0:
                                        delta = getattr(choices[0], "delta", None)
                                        if delta is not None:
                                            text = getattr(delta, "content", None)
                                            if text is not None:
                                                yield text
                            except Exception as e_fallback:
                                logger.error(f"Error with Azure AI API fallback: {str(e_fallback)}")
                                yield f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
                        elif request.provider == "dashscope":
                            try:
                                # Create new api_kwargs with the simplified prompt
                                fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                    input=simplified_prompt,
                                    model_kwargs=model_kwargs,
                                    model_type=ModelType.LLM,
                                )

                                logger.info("Making fallback Dashscope API call")
                                fallback_response = await model.acall(
                                    api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM
                                )

                                # DashscopeClient.acall (stream=True) returns an async
                                # generator of text chunks
                                async for text in fallback_response:
                                    if text:
                                        yield text
                            except Exception as e_fallback:
                                logger.error(
                                    f"Error with Dashscope API fallback: {str(e_fallback)}"
                                )
                                yield (
                                    f"\nError with Dashscope API fallback: {str(e_fallback)}\n\n"
                                    "Please check that you have set the DASHSCOPE_API_KEY (and optionally "
                                    "DASHSCOPE_WORKSPACE_ID) environment variables with valid values."
                                )
                        else:
                            # Google Generative AI fallback (default provider)
                            model_config = get_model_config(request.provider, request.model)
                            fallback_model = genai.GenerativeModel(
                                model_name=model_config["model_kwargs"]["model"],
                                generation_config={
                                    "temperature": model_config["model_kwargs"].get("temperature", 0.7),
                                    "top_p": model_config["model_kwargs"].get("top_p", 0.8),
                                    "top_k": model_config["model_kwargs"].get("top_k", 40),
                                },
                            )

                            fallback_response = fallback_model.generate_content(
                                simplified_prompt, stream=True
                            )
                            for chunk in fallback_response:
                                if hasattr(chunk, "text"):
                                    yield chunk.text
                    except Exception as e2:
                        logger.error(f"Error in fallback streaming response: {str(e2)}")
                        yield f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts."
                else:
                    # For other errors, return the error message
                    yield f"\nError: {error_message}"

        # Return streaming response
        return StreamingResponse(response_stream(), media_type="text/event-stream")

    except HTTPException:
        raise
    except Exception as e_handler:
        error_msg = f"Error in streaming chat completion: {str(e_handler)}"
        logger.error(error_msg)
        raise HTTPException(status_code=500, detail=error_msg)

@app.get("/")
async def root():
    """Root endpoint to check if the API is running"""
    return {"status": "API is running", "message": "Navigate to /docs for API documentation"}


================================================
FILE: api/tools/embedder.py
================================================
import adalflow as adal

from api.config import configs, get_embedder_type


def get_embedder(is_local_ollama: bool = False, use_google_embedder: bool = False, embedder_type: str = None) -> adal.Embedder:
    """Get embedder based on configuration or parameters.
    
    Args:
        is_local_ollama: Legacy parameter for Ollama embedder
        use_google_embedder: Legacy parameter for Google embedder  
        embedder_type: Direct specification of embedder type ('ollama', 'google', 'bedrock', 'openai')
    
    Returns:
        adal.Embedder: Configured embedder instance
    """
    # Determine which embedder config to use
    if embedder_type:
        if embedder_type == 'ollama':
            embedder_config = configs["embedder_ollama"]
        elif embedder_type == 'google':
            embedder_config = configs["embedder_google"]
        elif embedder_type == 'bedrock':
            embedder_config = configs["embedder_bedrock"]
        else:  # default to openai
            embedder_config = configs["embedder"]
    elif is_local_ollama:
        embedder_config = configs["embedder_ollama"]
    elif use_google_embedder:
        embedder_config = configs["embedder_google"]
    else:
        # Auto-detect based on current configuration
        current_type = get_embedder_type()
        if current_type == 'bedrock':
            embedder_config = configs["embedder_bedrock"]
        elif current_type == 'ollama':
            embedder_config = configs["embedder_ollama"]
        elif current_type == 'google':
            embedder_config = configs["embedder_google"]
        else:
            embedder_config = configs["embedder"]

    # --- Initialize Embedder ---
    model_client_class = embedder_config["model_client"]
    if "initialize_kwargs" in embedder_config:
        model_client = model_client_class(**embedder_config["initialize_kwargs"])
    else:
        model_client = model_client_class()
    
    # Create embedder with basic parameters
    embedder_kwargs = {"model_client": model_client, "model_kwargs": embedder_config["model_kwargs"]}
    
    embedder = adal.Embedder(**embedder_kwargs)
    
    # Set batch_size as an attribute if available (not a constructor parameter)
    if "batch_size" in embedder_config:
        embedder.batch_size = embedder_config["batch_size"]
    return embedder


================================================
FILE: api/websocket_wiki.py
================================================
import logging
import os
from typing import List, Optional, Dict, Any
from urllib.parse import unquote

import google.generativeai as genai
from adalflow.components.model_client.ollama_client import OllamaClient
from adalflow.core.types import ModelType
from fastapi import WebSocket, WebSocketDisconnect, HTTPException
from pydantic import BaseModel, Field

from api.config import (
    get_model_config,
    configs,
    OPENROUTER_API_KEY,
    OPENAI_API_KEY,
    AWS_ACCESS_KEY_ID,
    AWS_SECRET_ACCESS_KEY,
)
from api.data_pipeline import count_tokens, get_file_content
from api.bedrock_client import BedrockClient
from api.openai_client import OpenAIClient
from api.openrouter_client import OpenRouterClient
from api.azureai_client import AzureAIClient
from api.dashscope_client import DashscopeClient
from api.rag import RAG

# Configure logging
from api.logging_config import setup_logging

setup_logging()
logger = logging.getLogger(__name__)


# Models for the API
class ChatMessage(BaseModel):
    role: str  # 'user' or 'assistant'
    content: str

class ChatCompletionRequest(BaseModel):
    """
    Model for requesting a chat completion.
    """
    repo_url: str = Field(..., description="URL of the repository to query")
    messages: List[ChatMessage] = Field(..., description="List of chat messages")
    filePath: Optional[str] = Field(None, description="Optional path to a file in the repository to include in the prompt")
    token: Optional[str] = Field(None, description="Personal access token for private repositories")
    type: Optional[str] = Field("github", description="Type of repository (e.g., 'github', 'gitlab', 'bitbucket')")

    # model parameters
    provider: str = Field(
        "google",
        description="Model provider (google, openai, openrouter, ollama, bedrock, azure, dashscope)",
    )
    model: Optional[str] = Field(None, description="Model name for the specified provider")

    language: Optional[str] = Field("en", description="Language for content generation (e.g., 'en', 'ja', 'zh', 'es', 'kr', 'vi')")
    excluded_dirs: Optional[str] = Field(None, description="Comma-separated list of directories to exclude from processing")
    excluded_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to exclude from processing")
    included_dirs: Optional[str] = Field(None, description="Comma-separated list of directories to include exclusively")
    included_files: Optional[str] = Field(None, description="Comma-separated list of file patterns to include exclusively")

async def handle_websocket_chat(websocket: WebSocket):
    """
    Handle WebSocket connection for chat completions.
    This replaces the HTTP streaming endpoint with a WebSocket connection.
    """
    await websocket.accept()

    try:
        # Receive and parse the request data
        request_data = await websocket.receive_json()
        request = ChatCompletionRequest(**request_data)

        # Check if request contains very large input
        input_too_large = False
        if request.messages and len(request.messages) > 0:
            last_message = request.messages[-1]
            if hasattr(last_message, 'content') and last_message.content:
                tokens = count_tokens(last_message.content, request.provider == "ollama")
                logger.info(f"Request size: {tokens} tokens")
                if tokens > 8000:
                    logger.warning(f"Request exceeds recommended token limit ({tokens} > 7500)")
                    input_too_large = True

        # Create a new RAG instance for this request
        try:
            request_rag = RAG(provider=request.provider, model=request.model)

            # Extract custom file filter parameters if provided
            excluded_dirs = None
            excluded_files = None
            included_dirs = None
            included_files = None

            if request.excluded_dirs:
                excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()]
                logger.info(f"Using custom excluded directories: {excluded_dirs}")
            if request.excluded_files:
                excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()]
                logger.info(f"Using custom excluded files: {excluded_files}")
            if request.included_dirs:
                included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()]
                logger.info(f"Using custom included directories: {included_dirs}")
            if request.included_files:
                included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()]
                logger.info(f"Using custom included files: {included_files}")

            request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files)
            logger.info(f"Retriever prepared for {request.repo_url}")
        except ValueError as e:
            if "No valid documents with embeddings found" in str(e):
                logger.error(f"No valid embeddings found: {str(e)}")
                await websocket.send_text("Error: No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.")
                await websocket.close()
                return
            else:
                logger.error(f"ValueError preparing retriever: {str(e)}")
                await websocket.send_text(f"Error preparing retriever: {str(e)}")
                await websocket.close()
                return
        except Exception as e:
            logger.error(f"Error preparing retriever: {str(e)}")
            # Check for specific embedding-related errors
            if "All embeddings should be of the same size" in str(e):
                await websocket.send_text("Error: Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.")
            else:
                await websocket.send_text(f"Error preparing retriever: {str(e)}")
            await websocket.close()
            return

        # Validate request
        if not request.messages or len(request.messages) == 0:
            await websocket.send_text("Error: No messages provided")
            await websocket.close()
            return

        last_message = request.messages[-1]
        if last_message.role != "user":
            await websocket.send_text("Error: Last message must be from the user")
            await websocket.close()
            return

        # Process previous messages to build conversation history
        for i in range(0, len(request.messages) - 1, 2):
            if i + 1 < len(request.messages):
                user_msg = request.messages[i]
                assistant_msg = request.messages[i + 1]

                if user_msg.role == "user" and assistant_msg.role == "assistant":
                    request_rag.memory.add_dialog_turn(
                        user_query=user_msg.content,
                        assistant_response=assistant_msg.content
                    )

        # Check if this is a Deep Research request
        is_deep_research = False
        research_iteration = 1

        # Process messages to detect Deep Research requests
        for msg in request.messages:
            if hasattr(msg, 'content') and msg.content and "[DEEP RESEARCH]" in msg.content:
                is_deep_research = True
                # Only remove the tag from the last message
                if msg == request.messages[-1]:
                    # Remove the Deep Research tag
                    msg.content = msg.content.replace("[DEEP RESEARCH]", "").strip()

        # Count research iterations if this is a Deep Research request
        if is_deep_research:
            research_iteration = sum(1 for msg in request.messages if msg.role == 'assistant') + 1
            logger.info(f"Deep Research request detected - iteration {research_iteration}")

            # Check if this is a continuation request
            if "continue" in last_message.content.lower() and "research" in last_message.content.lower():
                # Find the original topic from the first user message
                original_topic = None
                for msg in request.messages:
                    if msg.role == "user" and "continue" not in msg.content.lower():
                        original_topic = msg.content.replace("[DEEP RESEARCH]", "").strip()
                        logger.info(f"Found original research topic: {original_topic}")
                        break

                if original_topic:
                    # Replace the continuation message with the original topic
                    last_message.content = original_topic
                    logger.info(f"Using original topic for research: {original_topic}")

        # Get the query from the last message
        query = last_message.content

        # Only retrieve documents if input is not too large
        context_text = ""
        retrieved_documents = None

        if not input_too_large:
            try:
                # If filePath exists, modify the query for RAG to focus on the file
                rag_query = query
                if request.filePath:
                    # Use the file path to get relevant context about the file
                    rag_query = f"Contexts related to {request.filePath}"
                    logger.info(f"Modified RAG query to focus on file: {request.filePath}")

                # Try to perform RAG retrieval
                try:
                    # This will use the actual RAG implementation
                    retrieved_documents = request_rag(rag_query, language=request.language)

                    if retrieved_documents and retrieved_documents[0].documents:
                        # Format context for the prompt in a more structured way
                        documents = retrieved_documents[0].documents
                        logger.info(f"Retrieved {len(documents)} documents")

                        # Group documents by file path
                        docs_by_file = {}
                        for doc in documents:
                            file_path = doc.meta_data.get('file_path', 'unknown')
                            if file_path not in docs_by_file:
                                docs_by_file[file_path] = []
                            docs_by_file[file_path].append(doc)

                        # Format context text with file path grouping
                        context_parts = []
                        for file_path, docs in docs_by_file.items():
                            # Add file header with metadata
                            header = f"## File Path: {file_path}\n\n"
                            # Add document content
                            content = "\n\n".join([doc.text for doc in docs])

                            context_parts.append(f"{header}{content}")

                        # Join all parts with clear separation
                        context_text = "\n\n" + "-" * 10 + "\n\n".join(context_parts)
                    else:
                        logger.warning("No documents retrieved from RAG")
                except Exception as e:
                    logger.error(f"Error in RAG retrieval: {str(e)}")
                    # Continue without RAG if there's an error

            except Exception as e:
                logger.error(f"Error retrieving documents: {str(e)}")
                context_text = ""

        # Get repository information
        repo_url = request.repo_url
        repo_name = repo_url.split("/")[-1] if "/" in repo_url else repo_url

        # Determine repository type
        repo_type = request.type

        # Get language information
        language_code = request.language or configs["lang_config"]["default"]
        supported_langs = configs["lang_config"]["supported_languages"]
        language_name = supported_langs.get(language_code, "English")

        # Create system prompt
        if is_deep_research:
            # Check if this is the first iteration
            is_first_iteration = research_iteration == 1

            # Check if this is the final iteration
            is_final_iteration = research_iteration >= 5

            if is_first_iteration:
                system_prompt = f"""<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are conducting a multi-turn Deep Research process to thoroughly investigate the specific topic in the user's query.
Your goal is to provide detailed, focused information EXCLUSIVELY about this topic.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- This is the first iteration of a multi-turn research process focused EXCLUSIVELY on the user's query
- Start your response with "## Research Plan"
- Outline your approach to investigating this specific topic
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- Clearly state the specific topic you're researching to maintain focus throughout all iterations
- Identify the key aspects you'll need to research
- Provide initial findings based on the information available
- End with "## Next Steps" indicating what you'll investigate in the next iteration
- Do NOT provide a final conclusion yet - this is just the beginning of the research
- Do NOT include general repository information unless directly relevant to the query
- Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
- Your research MUST directly address the original question
- NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
- Remember that this topic will be maintained across all research iterations
</guidelines>

<style>
- Be concise but thorough
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
</style>"""
            elif is_final_iteration:
                system_prompt = f"""<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are in the final iteration of a Deep Research process focused EXCLUSIVELY on the latest user query.
Your goal is to synthesize all previous findings and provide a comprehensive conclusion that directly addresses this specific topic and ONLY this topic.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- This is the final iteration of the research process
- CAREFULLY review the entire conversation history to understand all previous findings
- Synthesize ALL findings from previous iterations into a comprehensive conclusion
- Start with "## Final Conclusion"
- Your conclusion MUST directly address the original question
- Stay STRICTLY focused on the specific topic - do not drift to related topics
- Include specific code references and implementation details related to the topic
- Highlight the most important discoveries and insights about this specific functionality
- Provide a complete and definitive answer to the original question
- Do NOT include general repository information unless directly relevant to the query
- Focus exclusively on the specific topic being researched
- NEVER respond with "Continue the research" as an answer - always provide a complete conclusion
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- Ensure your conclusion builds on and references key findings from previous iterations
</guidelines>

<style>
- Be concise but thorough
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
- Structure your response with clear headings
- End with actionable insights or recommendations when appropriate
</style>"""
            else:
                system_prompt = f"""<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You are currently in iteration {research_iteration} of a Deep Research process focused EXCLUSIVELY on the latest user query.
Your goal is to build upon previous research iterations and go deeper into this specific topic without deviating from it.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- CAREFULLY review the conversation history to understand what has been researched so far
- Your response MUST build on previous research iterations - do not repeat information already covered
- Identify gaps or areas that need further exploration related to this specific topic
- Focus on one specific aspect that needs deeper investigation in this iteration
- Start your response with "## Research Update {research_iteration}"
- Clearly explain what you're investigating in this iteration
- Provide new insights that weren't covered in previous iterations
- If this is iteration 3, prepare for a final conclusion in the next iteration
- Do NOT include general repository information unless directly relevant to the query
- Focus EXCLUSIVELY on the specific topic being researched - do not drift to related topics
- If the topic is about a specific file or feature (like "Dockerfile"), focus ONLY on that file or feature
- NEVER respond with just "Continue the research" as an answer - always provide substantive research findings
- Your research MUST directly address the original question
- Maintain continuity with previous research iterations - this is a continuous investigation
</guidelines>

<style>
- Be concise but thorough
- Focus on providing new information, not repeating what's already been covered
- Use markdown formatting to improve readability
- Cite specific files and code sections when relevant
</style>"""
        else:
            system_prompt = f"""<role>
You are an expert code analyst examining the {repo_type} repository: {repo_url} ({repo_name}).
You provide direct, concise, and accurate information about code repositories.
You NEVER start responses with markdown headers or code fences.
IMPORTANT:You MUST respond in {language_name} language.
</role>

<guidelines>
- Answer the user's question directly without ANY preamble or filler phrases
- DO NOT include any rationale, explanation, or extra comments.
- Strictly base answers ONLY on existing code or documents
- DO NOT speculate or invent citations.
- DO NOT start with preambles like "Okay, here's a breakdown" or "Here's an explanation"
- DO NOT start with markdown headers like "## Analysis of..." or any file path references
- DO NOT start with ```markdown code fences
- DO NOT end your response with ``` closing fences
- DO NOT start by repeating or acknowledging the question
- JUST START with the direct answer to the question

<example_of_what_not_to_do>
```markdown
## Analysis of `adalflow/adalflow/datasets/gsm8k.py`

This file contains...
```
</example_of_what_not_to_do>

- Format your response with proper markdown including headings, lists, and code blocks WITHIN your answer
- For code analysis, organize your response with clear sections
- Think step by step and structure your answer logically
- Start with the most relevant information that directly addresses the user's query
- Be precise and technical when discussing code
- Your response language should be in the same language as the user's query
</guidelines>

<style>
- Use concise, direct language
- Prioritize accuracy over verbosity
- When showing code, include line numbers and file paths when relevant
- Use markdown formatting to improve readability
</style>"""

        # Fetch file content if provided
        file_content = ""
        if request.filePath:
            try:
                file_content = get_file_content(request.repo_url, request.filePath, request.type, request.token)
                logger.info(f"Successfully retrieved content for file: {request.filePath}")
            except Exception as e:
                logger.error(f"Error retrieving file content: {str(e)}")
                # Continue without file content if there's an error

        # Format conversation history
        conversation_history = ""
        for turn_id, turn in request_rag.memory().items():
            if not isinstance(turn_id, int) and hasattr(turn, 'user_query') and hasattr(turn, 'assistant_response'):
                conversation_history += f"<turn>\n<user>{turn.user_query.query_str}</user>\n<assistant>{turn.assistant_response.response_str}</assistant>\n</turn>\n"

        # Create the prompt with context
        prompt = f"/no_think {system_prompt}\n\n"

        if conversation_history:
            prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"

        # Check if filePath is provided and fetch file content if it exists
        if file_content:
            # Add file content to the prompt after conversation history
            prompt += f"<currentFileContent path=\"{request.filePath}\">\n{file_content}\n</currentFileContent>\n\n"

        # Only include context if it's not empty
        CONTEXT_START = "<START_OF_CONTEXT>"
        CONTEXT_END = "<END_OF_CONTEXT>"
        if context_text.strip():
            prompt += f"{CONTEXT_START}\n{context_text}\n{CONTEXT_END}\n\n"
        else:
            # Add a note that we're skipping RAG due to size constraints or because it's the isolated API
            logger.info("No context available from RAG")
            prompt += "<note>Answering without retrieval augmentation.</note>\n\n"

        prompt += f"<query>\n{query}\n</query>\n\nAssistant: "

        model_config = get_model_config(request.provider, request.model)["model_kwargs"]

        if request.provider == "ollama":
            prompt += " /no_think"

            model = OllamaClient()
            model_kwargs = {
                "model": model_config["model"],
                "stream": True,
                "options": {
                    "temperature": model_config["temperature"],
                    "top_p": model_config["top_p"],
                    "num_ctx": model_config["num_ctx"]
                }
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "openrouter":
            logger.info(f"Using OpenRouter with model: {request.model}")

            # Check if OpenRouter API key is set
            if not OPENROUTER_API_KEY:
                logger.warning("OPENROUTER_API_KEY not configured, but continuing with request")
                # We'll let the OpenRouterClient handle this and return a friendly error message

            model = OpenRouterClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"]
            }
            # Only add top_p if it exists in the model config
            if "top_p" in model_config:
                model_kwargs["top_p"] = model_config["top_p"]

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "openai":
            logger.info(f"Using Openai protocol with model: {request.model}")

            # Check if an API key is set for Openai
            if not OPENAI_API_KEY:
                logger.warning("OPENAI_API_KEY not configured, but continuing with request")
                # We'll let the OpenAIClient handle this and return an error message

            # Initialize Openai client
            model = OpenAIClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"]
            }
            # Only add top_p if it exists in the model config
            if "top_p" in model_config:
                model_kwargs["top_p"] = model_config["top_p"]

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "bedrock":
            logger.info(f"Using AWS Bedrock with model: {request.model}")

            if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY:
                logger.warning(
                    "AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY not configured, but continuing with request")

            model = BedrockClient()
            model_kwargs = {
                "model": request.model,
            }

            for key in ["temperature", "top_p"]:
                if key in model_config:
                    model_kwargs[key] = model_config[key]

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "azure":
            logger.info(f"Using Azure AI with model: {request.model}")

            # Initialize Azure AI client
            model = AzureAIClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"],
                "top_p": model_config["top_p"]
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        elif request.provider == "dashscope":
            logger.info(f"Using Dashscope with model: {request.model}")

            # Initialize Dashscope client
            model = DashscopeClient()
            model_kwargs = {
                "model": request.model,
                "stream": True,
                "temperature": model_config["temperature"],
                "top_p": model_config["top_p"]
            }

            api_kwargs = model.convert_inputs_to_api_kwargs(
                input=prompt,
                model_kwargs=model_kwargs,
                model_type=ModelType.LLM
            )
        else:
            # Initialize Google Generative AI model
            model = genai.GenerativeModel(
                model_name=model_config["model"],
                generation_config={
                    "temperature": model_config["temperature"],
                    "top_p": model_config["top_p"],
                    "top_k": model_config["top_k"]
                }
            )

        # Process the response based on the provider
        try:
            if request.provider == "ollama":
                # Get the response and handle it properly using the previously created api_kwargs
                response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                # Handle streaming response from Ollama
                async for chunk in response:
                    text = None
                    if isinstance(chunk, dict):
                        text = chunk.get("message", {}).get("content") if isinstance(chunk.get("message"), dict) else chunk.get("message")
                    else:
                        message = getattr(chunk, "message", None)
                        if message is not None:
                            if isinstance(message, dict):
                                text = message.get("content")
                            else:
                                text = getattr(message, "content", None)

                    if not text:
                        text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None)

                    if not text and hasattr(chunk, "__dict__"):
                        message = chunk.__dict__.get("message")
                        if isinstance(message, dict):
                            text = message.get("content")

                    if isinstance(text, str) and text and not text.startswith('model=') and not text.startswith('created_at='):
                        clean_text = text.replace('<think>', '').replace('</think>', '')
                        await websocket.send_text(clean_text)
                # Explicitly close the WebSocket connection after the response is complete
                await websocket.close()
            elif request.provider == "openrouter":
                try:
                    # Get the response and handle it properly using the previously created api_kwargs
                    logger.info("Making OpenRouter API call")
                    response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                    # Handle streaming response from OpenRouter
                    async for chunk in response:
                        await websocket.send_text(chunk)
                    # Explicitly close the WebSocket connection after the response is complete
                    await websocket.close()
                except Exception as e_openrouter:
                    logger.error(f"Error with OpenRouter API: {str(e_openrouter)}")
                    error_msg = f"\nError with OpenRouter API: {str(e_openrouter)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
                    await websocket.send_text(error_msg)
                    # Close the WebSocket connection after sending the error message
                    await websocket.close()
            elif request.provider == "openai":
                try:
                    # Get the response and handle it properly using the previously created api_kwargs
                    logger.info("Making Openai API call")
                    response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                    # Handle streaming response from Openai
                    async for chunk in response:
                        choices = getattr(chunk, "choices", [])
                        if len(choices) > 0:
                            delta = getattr(choices[0], "delta", None)
                            if delta is not None:
                                text = getattr(delta, "content", None)
                                if text is not None:
                                    await websocket.send_text(text)
                    # Explicitly close the WebSocket connection after the response is complete
                    await websocket.close()
                except Exception as e_openai:
                    logger.error(f"Error with Openai API: {str(e_openai)}")
                    error_msg = f"\nError with Openai API: {str(e_openai)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
                    await websocket.send_text(error_msg)
                    # Close the WebSocket connection after sending the error message
                    await websocket.close()
            elif request.provider == "bedrock":
                try:
                    logger.info("Making AWS Bedrock API call")
                    response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                    if isinstance(response, str):
                        await websocket.send_text(response)
                    else:
                        await websocket.send_text(str(response))
                    await websocket.close()
                except Exception as e_bedrock:
                    logger.error(f"Error with AWS Bedrock API: {str(e_bedrock)}")
                    error_msg = (
                        f"\nError with AWS Bedrock API: {str(e_bedrock)}\n\n"
                        "Please check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY "
                        "environment variables with valid credentials."
                    )
                    await websocket.send_text(error_msg)
                    await websocket.close()
            elif request.provider == "azure":
                try:
                    # Get the response and handle it properly using the previously created api_kwargs
                    logger.info("Making Azure AI API call")
                    response = await model.acall(api_kwargs=api_kwargs, model_type=ModelType.LLM)
                    # Handle streaming response from Azure AI
                    async for chunk in response:
                        choices = getattr(chunk, "choices", [])
                        if len(choices) > 0:
                            delta = getattr(choices[0], "delta", None)
                            if delta is not None:
                                text = getattr(delta, "content", None)
                                if text is not None:
                                    await websocket.send_text(text)
                    # Explicitly close the WebSocket connection after the response is complete
                    await websocket.close()
                except Exception as e_azure:
                    logger.error(f"Error with Azure AI API: {str(e_azure)}")
                    error_msg = f"\nError with Azure AI API: {str(e_azure)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
                    await websocket.send_text(error_msg)
                    # Close the WebSocket connection after sending the error message
                    await websocket.close()
            elif request.provider == "dashscope":
                try:
                    # Get the response and handle it properly using the previously created api_kwargs
                    logger.info("Making Dashscope API call")
                    response = await model.acall(
                        api_kwargs=api_kwargs, model_type=ModelType.LLM
                    )
                    # DashscopeClient.acall with stream=True returns an async
                    # generator of plain text chunks
                    async for text in response:
                        if text:
                            await websocket.send_text(text)
                    # Explicitly close the WebSocket connection after the response is complete
                    await websocket.close()
                except Exception as e_dashscope:
                    logger.error(f"Error with Dashscope API: {str(e_dashscope)}")
                    error_msg = (
                        f"\nError with Dashscope API: {str(e_dashscope)}\n\n"
                        "Please check that you have set the DASHSCOPE_API_KEY (and optionally "
                        "DASHSCOPE_WORKSPACE_ID) environment variables with valid values."
                    )
                    await websocket.send_text(error_msg)
                    # Close the WebSocket connection after sending the error message
                    await websocket.close()
            else:
                # Google Generative AI (default provider)
                response = model.generate_content(prompt, stream=True)
                for chunk in response:
                    if hasattr(chunk, 'text'):
                        await websocket.send_text(chunk.text)
                await websocket.close()

        except Exception as e_outer:
            logger.error(f"Error in streaming response: {str(e_outer)}")
            error_message = str(e_outer)

            # Check for token limit errors
            if "maximum context length" in error_message or "token limit" in error_message or "too many tokens" in error_message:
                # If we hit a token limit error, try again without context
                logger.warning("Token limit exceeded, retrying without context")
                try:
                    # Create a simplified prompt without context
                    simplified_prompt = f"/no_think {system_prompt}\n\n"
                    if conversation_history:
                        simplified_prompt += f"<conversation_history>\n{conversation_history}</conversation_history>\n\n"

                    # Include file content in the fallback prompt if it was retrieved
                    if request.filePath and file_content:
                        simplified_prompt += f"<currentFileContent path=\"{request.filePath}\">\n{file_content}\n</currentFileContent>\n\n"

                    simplified_prompt += "<note>Answering without retrieval augmentation due to input size constraints.</note>\n\n"
                    simplified_prompt += f"<query>\n{query}\n</query>\n\nAssistant: "

                    if request.provider == "ollama":
                        simplified_prompt += " /no_think"

                        # Create new api_kwargs with the simplified prompt
                        fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                            input=simplified_prompt,
                            model_kwargs=model_kwargs,
                            model_type=ModelType.LLM
                        )

                        # Get the response using the simplified prompt
                        fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                        # Handle streaming fallback_response from Ollama
                        async for chunk in fallback_response:
                            text = getattr(chunk, 'response', None) or getattr(chunk, 'text', None) or str(chunk)
                            if text and not text.startswith('model=') and not text.startswith('created_at='):
                                text = text.replace('<think>', '').replace('</think>', '')
                                await websocket.send_text(text)
                    elif request.provider == "openrouter":
                        try:
                            # Create new api_kwargs with the simplified prompt
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM
                            )

                            # Get the response using the simplified prompt
                            logger.info("Making fallback OpenRouter API call")
                            fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                            # Handle streaming fallback_response from OpenRouter
                            async for chunk in fallback_response:
                                await websocket.send_text(chunk)
                        except Exception as e_fallback:
                            logger.error(f"Error with OpenRouter API fallback: {str(e_fallback)}")
                            error_msg = f"\nError with OpenRouter API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENROUTER_API_KEY environment variable with a valid API key."
                            await websocket.send_text(error_msg)
                    elif request.provider == "openai":
                        try:
                            # Create new api_kwargs with the simplified prompt
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM
                            )

                            # Get the response using the simplified prompt
                            logger.info("Making fallback Openai API call")
                            fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                            # Handle streaming fallback_response from Openai
                            async for chunk in fallback_response:
                                text = chunk if isinstance(chunk, str) else getattr(chunk, 'text', str(chunk))
                                await websocket.send_text(text)
                        except Exception as e_fallback:
                            logger.error(f"Error with Openai API fallback: {str(e_fallback)}")
                            error_msg = f"\nError with Openai API fallback: {str(e_fallback)}\n\nPlease check that you have set the OPENAI_API_KEY environment variable with a valid API key."
                            await websocket.send_text(error_msg)
                    elif request.provider == "bedrock":
                        try:
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM,
                            )

                            logger.info("Making fallback AWS Bedrock API call")
                            fallback_response = await model.acall(
                                api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM
                            )

                            if isinstance(fallback_response, str):
                                await websocket.send_text(fallback_response)
                            else:
                                await websocket.send_text(str(fallback_response))
                        except Exception as e_fallback:
                            logger.error(
                                f"Error with AWS Bedrock API fallback: {str(e_fallback)}"
                            )
                            error_msg = (
                                f"\nError with AWS Bedrock API fallback: {str(e_fallback)}\n\n"
                                "Please check that you have set the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY "
                                "environment variables with valid credentials."
                            )
                            await websocket.send_text(error_msg)
                    elif request.provider == "azure":
                        try:
                            # Create new api_kwargs with the simplified prompt
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM
                            )

                            # Get the response using the simplified prompt
                            logger.info("Making fallback Azure AI API call")
                            fallback_response = await model.acall(api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM)

                            # Handle streaming fallback response from Azure AI
                            async for chunk in fallback_response:
                                choices = getattr(chunk, "choices", [])
                                if len(choices) > 0:
                                    delta = getattr(choices[0], "delta", None)
                                    if delta is not None:
                                        text = getattr(delta, "content", None)
                                        if text is not None:
                                            await websocket.send_text(text)
                        except Exception as e_fallback:
                            logger.error(f"Error with Azure AI API fallback: {str(e_fallback)}")
                            error_msg = f"\nError with Azure AI API fallback: {str(e_fallback)}\n\nPlease check that you have set the AZURE_OPENAI_API_KEY, AZURE_OPENAI_ENDPOINT, and AZURE_OPENAI_VERSION environment variables with valid values."
                            await websocket.send_text(error_msg)
                    elif request.provider == "dashscope":
                        try:
                            # Create new api_kwargs with the simplified prompt
                            fallback_api_kwargs = model.convert_inputs_to_api_kwargs(
                                input=simplified_prompt,
                                model_kwargs=model_kwargs,
                                model_type=ModelType.LLM,
                            )

                            logger.info("Making fallback Dashscope API call")
                            fallback_response = await model.acall(
                                api_kwargs=fallback_api_kwargs, model_type=ModelType.LLM
                            )

                            # DashscopeClient.acall (stream=True) returns an async
                            # generator of text chunks
                            async for text in fallback_response:
                                if text:
                                    await websocket.send_text(text)
                        except Exception as e_fallback:
                            logger.error(
                                f"Error with Dashscope API fallback: {str(e_fallback)}"
                            )
                            error_msg = (
                                f"\nError with Dashscope API fallback: {str(e_fallback)}\n\n"
                                "Please check that you have set the DASHSCOPE_API_KEY (and optionally "
                                "DASHSCOPE_WORKSPACE_ID) environment variables with valid values."
                            )
                            await websocket.send_text(error_msg)
                    else:
                        # Google Generative AI fallback (default provider)
                        model_config = get_model_config(request.provider, request.model)
                        fallback_model = genai.GenerativeModel(
                            model_name=model_config["model_kwargs"]["model"],
                            generation_config={
                                "temperature": model_config["model_kwargs"].get("temperature", 0.7),
                                "top_p": model_config["model_kwargs"].get("top_p", 0.8),
                                "top_k": model_config["model_kwargs"].get("top_k", 40),
                            },
                        )

                        fallback_response = fallback_model.generate_content(
                            simplified_prompt, stream=True
                        )
                        for chunk in fallback_response:
                            if hasattr(chunk, "text"):
                                await websocket.send_text(chunk.text)
                except Exception as e2:
                    logger.error(f"Error in fallback streaming response: {str(e2)}")
                    await websocket.send_text(f"\nI apologize, but your request is too large for me to process. Please try a shorter query or break it into smaller parts.")
                    # Close the WebSocket connection after sending the error message
                    await websocket.close()
            else:
                # For other errors, return the error message
                await websocket.send_text(f"\nError: {error_message}")
                # Close the WebSocket connection after sending the error message
                await websocket.close()

    except WebSocketDisconnect:
        logger.info("WebSocket disconnected")
    except Exception as e:
        logger.error(f"Error in WebSocket handler: {str(e)}")
        try:
            await websocket.send_text(f"Error: {str(e)}")
            await websocket.close()
        except Exception:
            pass


================================================
FILE: docker-compose.yml
================================================
services:
  deepwiki:
    build:
      context: .
      dockerfile: Dockerfile
    ports:
      - "${PORT:-8001}:${PORT:-8001}"  # API port
      - "3000:3000"  # Next.js port
    env_file:
      - .env
    environment:
      - PORT=${PORT:-8001}
      - NODE_ENV=production
      - SERVER_BASE_URL=http://localhost:${PORT:-8001}
      - LOG_LEVEL=${LOG_LEVEL:-INFO}
      - LOG_FILE_PATH=${LOG_FILE_PATH:-api/logs/application.log}
    volumes:
      - ~/.adalflow:/root/.adalflow      # Persist repository and embedding data
      - ./api/logs:/app/api/logs          # Persist log files across container restarts
    # Resource limits for docker-compose up (not Swarm mode)
    mem_limit: 6g
    mem_reservation: 2g
    # Health check configuration
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:${PORT:-8001}/health"]
      interval: 60s
      timeout: 10s
      retries: 3
      start_period: 30s


================================================
FILE: eslint.config.mjs
================================================
import { dirname } from "path";
import { fileURLToPath } from "url";
import { FlatCompat } from "@eslint/eslintrc";

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

const compat = new FlatCompat({
  baseDirectory: __dirname,
});

const eslintConfig = [
  ...compat.extends("next/core-web-vitals", "next/typescript"),
];

export default eslintConfig;


================================================
FILE: next.config.ts
================================================
import type { NextConfig } from "next";

const TARGET_SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

const nextConfig: NextConfig = {
  /* config options here */
  output: 'standalone',
  // Optimize build for Docker
  experimental: {
    optimizePackageImports: ['@mermaid-js/mermaid', 'react-syntax-highlighter'],
  },
  // Reduce memory usage during build
  webpack: (config, { isServer }) => {
    if (!isServer) {
      config.resolve.fallback = {
        ...config.resolve.fallback,
        fs: false,
      };
    }
    // Optimize bundle size
    config.optimization = {
      ...config.optimization,
      splitChunks: {
        chunks: 'all',
        cacheGroups: {
          vendor: {
            test: /[\\/]node_modules[\\/]/,
            name: 'vendors',
            chunks: 'all',
          },
        },
      },
    };
    return config;
  },
  async rewrites() {
    return [
      {
        source: '/api/wiki_cache/:path*',
        destination: `${TARGET_SERVER_BASE_URL}/api/wiki_cache/:path*`,
      },
      {
        source: '/export/wiki/:path*',
        destination: `${TARGET_SERVER_BASE_URL}/export/wiki/:path*`,
      },
      {
        source: '/api/wiki_cache',
        destination: `${TARGET_SERVER_BASE_URL}/api/wiki_cache`,
      },
      {
        source: '/local_repo/structure',
        destination: `${TARGET_SERVER_BASE_URL}/local_repo/structure`,
      },
      {
        source: '/api/auth/status',
        destination: `${TARGET_SERVER_BASE_URL}/auth/status`,
      },
      {
        source: '/api/auth/validate',
        destination: `${TARGET_SERVER_BASE_URL}/auth/validate`,
      },
      {
        source: '/api/lang/config',
        destination: `${TARGET_SERVER_BASE_URL}/lang/config`,
      },
    ];
  },
};

export default nextConfig;


================================================
FILE: package.json
================================================
{
  "name": "deepwiki-open",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "dev": "next dev --turbopack --port 3000",
    "build": "next build",
    "start": "next start",
    "lint": "next lint"
  },
  "dependencies": {
    "mermaid": "^11.4.1",
    "next": "15.3.1",
    "next-intl": "^4.1.0",
    "next-themes": "^0.4.6",
    "react": "^19.0.0",
    "react-dom": "^19.0.0",
    "react-icons": "^5.5.0",
    "react-markdown": "^10.1.0",
    "react-syntax-highlighter": "^15.6.1",
    "rehype-raw": "^7.0.0",
    "remark-gfm": "^4.0.1",
    "svg-pan-zoom": "^3.6.2"
  },
  "devDependencies": {
    "@eslint/eslintrc": "^3",
    "@tailwindcss/postcss": "^4",
    "@types/node": "^20",
    "@types/react": "^19",
    "@types/react-dom": "^19",
    "@types/react-syntax-highlighter": "^15.5.13",
    "eslint": "^9",
    "eslint-config-next": "15.3.1",
    "tailwindcss": "^4",
    "typescript": "^5"
  },
  "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
}


================================================
FILE: postcss.config.mjs
================================================
const config = {
  plugins: ["@tailwindcss/postcss"],
};

export default config;


================================================
FILE: pytest.ini
================================================
[tool:pytest]
testpaths = test
python_files = test_*.py *_test.py
python_classes = Test*
python_functions = test_*
addopts = 
    -v
    --strict-markers
    --disable-warnings
    --tb=short
markers =
    unit: Unit tests
    integration: Integration tests
    slow: Slow tests that take more than a few seconds
    network: Tests that require network access


================================================
FILE: run.sh
================================================
uv run -m api.main

================================================
FILE: src/app/[owner]/[repo]/page.tsx
================================================
/* eslint-disable @typescript-eslint/no-unused-vars */
'use client';

import Ask from '@/components/Ask';
import Markdown from '@/components/Markdown';
import ModelSelectionModal from '@/components/ModelSelectionModal';
import ThemeToggle from '@/components/theme-toggle';
import WikiTreeView from '@/components/WikiTreeView';
import { useLanguage } from '@/contexts/LanguageContext';
import { RepoInfo } from '@/types/repoinfo';
import getRepoUrl from '@/utils/getRepoUrl';
import { extractUrlDomain, extractUrlPath } from '@/utils/urlDecoder';
import Link from 'next/link';
import { useParams, useSearchParams } from 'next/navigation';
import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { FaBitbucket, FaBookOpen, FaComments, FaDownload, FaExclamationTriangle, FaFileExport, FaFolder, FaGithub, FaGitlab, FaHome, FaSync, FaTimes } from 'react-icons/fa';
// Define the WikiSection and WikiStructure types directly in this file
// since the imported types don't have the sections and rootSections properties
interface WikiSection {
  id: string;
  title: string;
  pages: string[];
  subsections?: string[];
}

interface WikiPage {
  id: string;
  title: string;
  content: string;
  filePaths: string[];
  importance: 'high' | 'medium' | 'low';
  relatedPages: string[];
  parentId?: string;
  isSection?: boolean;
  children?: string[];
}

interface WikiStructure {
  id: string;
  title: string;
  description: string;
  pages: WikiPage[];
  sections: WikiSection[];
  rootSections: string[];
}

// Add CSS styles for wiki with Japanese aesthetic
const wikiStyles = `
  .prose code {
    @apply bg-[var(--background)]/70 px-1.5 py-0.5 rounded font-mono text-xs border border-[var(--border-color)];
  }

  .prose pre {
    @apply bg-[var(--background)]/80 text-[var(--foreground)] rounded-md p-4 overflow-x-auto border border-[var(--border-color)] shadow-sm;
  }

  .prose h1, .prose h2, .prose h3, .prose h4 {
    @apply font-serif text-[var(--foreground)];
  }

  .prose p {
    @apply text-[var(--foreground)] leading-relaxed;
  }

  .prose a {
    @apply text-[var(--accent-primary)] hover:text-[var(--highlight)] transition-colors no-underline border-b border-[var(--border-color)] hover:border-[var(--accent-primary)];
  }

  .prose blockquote {
    @apply border-l-4 border-[var(--accent-primary)]/30 bg-[var(--background)]/30 pl-4 py-1 italic;
  }

  .prose ul, .prose ol {
    @apply text-[var(--foreground)];
  }

  .prose table {
    @apply border-collapse border border-[var(--border-color)];
  }

  .prose th {
    @apply bg-[var(--background)]/70 text-[var(--foreground)] p-2 border border-[var(--border-color)];
  }

  .prose td {
    @apply p-2 border border-[var(--border-color)];
  }
`;

// Helper function to generate cache key for localStorage
const getCacheKey = (owner: string, repo: string, repoType: string, language: string, isComprehensive: boolean = true): string => {
  return `deepwiki_cache_${repoType}_${owner}_${repo}_${language}_${isComprehensive ? 'comprehensive' : 'concise'}`;
};

// Helper function to add tokens and other parameters to request body
const addTokensToRequestBody = (
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  requestBody: Record<string, any>,
  token: string,
  repoType: string,
  provider: string = '',
  model: string = '',
  isCustomModel: boolean = false,
  customModel: string = '',
  language: string = 'en',
  excludedDirs?: string,
  excludedFiles?: string,
  includedDirs?: string,
  includedFiles?: string
): void => {
  if (token !== '') {
    requestBody.token = token;
  }

  // Add provider-based model selection parameters
  requestBody.provider = provider;
  requestBody.model = model;
  if (isCustomModel && customModel) {
    requestBody.custom_model = customModel;
  }

  requestBody.language = language;

  // Add file filter parameters if provided
  if (excludedDirs) {
    requestBody.excluded_dirs = excludedDirs;
  }
  if (excludedFiles) {
    requestBody.excluded_files = excludedFiles;
  }
  if (includedDirs) {
    requestBody.included_dirs = includedDirs;
  }
  if (includedFiles) {
    requestBody.included_files = includedFiles;
  }

};

const createGithubHeaders = (githubToken: string): HeadersInit => {
  const headers: HeadersInit = {
    'Accept': 'application/vnd.github.v3+json'
  };

  if (githubToken) {
    headers['Authorization'] = `Bearer ${githubToken}`;
  }

  return headers;
};

const createGitlabHeaders = (gitlabToken: string): HeadersInit => {
  const headers: HeadersInit = {
    'Content-Type': 'application/json',
  };

  if (gitlabToken) {
    headers['PRIVATE-TOKEN'] = gitlabToken;
  }

  return headers;
};

const createBitbucketHeaders = (bitbucketToken: string): HeadersInit => {
  const headers: HeadersInit = {
    'Content-Type': 'application/json',
  };

  if (bitbucketToken) {
    headers['Authorization'] = `Bearer ${bitbucketToken}`;
  }

  return headers;
};


export default function RepoWikiPage() {
  // Get route parameters and search params
  const params = useParams();
  const searchParams = useSearchParams();

  // Extract owner and repo from route params
  const owner = params.owner as string;
  const repo = params.repo as string;

  // Extract tokens from search params
  const token = searchParams.get('token') || '';
  const localPath = searchParams.get('local_path') ? decodeURIComponent(searchParams.get('local_path') || '') : undefined;
  const repoUrl = searchParams.get('repo_url') ? decodeURIComponent(searchParams.get('repo_url') || '') : undefined;
  const providerParam = searchParams.get('provider') || '';
  const modelParam = searchParams.get('model') || '';
  const isCustomModelParam = searchParams.get('is_custom_model') === 'true';
  const customModelParam = searchParams.get('custom_model') || '';
  const language = searchParams.get('language') || 'en';
  const repoHost = (() => {
    if (!repoUrl) return '';
    try {
      return new URL(repoUrl).hostname.toLowerCase();
    } catch (e) {
      console.warn(`Invalid repoUrl provided: ${repoUrl}`);
      return '';
    }
  })();
  const repoType = repoHost?.includes('bitbucket')
    ? 'bitbucket'
    : repoHost?.includes('gitlab')
      ? 'gitlab'
      : repoHost?.includes('github')
        ? 'github'
        : searchParams.get('type') || 'github';

  // Import language context for translations
  const { messages } = useLanguage();

  // Initialize repo info
  const repoInfo = useMemo<RepoInfo>(() => ({
    owner,
    repo,
    type: repoType,
    token: token || null,
    localPath: localPath || null,
    repoUrl: repoUrl || null
  }), [owner, repo, repoType, localPath, repoUrl, token]);

  // State variables
  const [isLoading, setIsLoading] = useState(true);
  const [loadingMessage, setLoadingMessage] = useState<string | undefined>(
    messages.loading?.initializing || 'Initializing wiki generation...'
  );
  const [error, setError] = useState<string | null>(null);
  const [wikiStructure, setWikiStructure] = useState<WikiStructure | undefined>();
  const [currentPageId, setCurrentPageId] = useState<string | undefined>();
  const [generatedPages, setGeneratedPages] = useState<Record<string, WikiPage>>({});
  const [pagesInProgress, setPagesInProgress] = useState(new Set<string>());
  const [isExporting, setIsExporting] = useState(false);
  const [exportError, setExportError] = useState<string | null>(null);
  const [originalMarkdown, setOriginalMarkdown] = useState<Record<string, string>>({});
  const [requestInProgress, setRequestInProgress] = useState(false);
  const [currentToken, setCurrentToken] = useState(token); // Track current effective token
  const [effectiveRepoInfo, setEffectiveRepoInfo] = useState(repoInfo); // Track effective repo info with cached data
  const [embeddingError, setEmbeddingError] = useState(false);

  // Model selection state variables
  const [selectedProviderState, setSelectedProviderState] = useState(providerParam);
  const [selectedModelState, setSelectedModelState] = useState(modelParam);
  const [isCustomSelectedModelState, setIsCustomSelectedModelState] = useState(isCustomModelParam);
  const [customSelectedModelState, setCustomSelectedModelState] = useState(customModelParam);
  const [showModelOptions, setShowModelOptions] = useState(false); // Controls whether to show model options
  const excludedDirs = searchParams.get('excluded_dirs') || '';
  const excludedFiles = searchParams.get('excluded_files') || '';
  const [modelExcludedDirs, setModelExcludedDirs] = useState(excludedDirs);
  const [modelExcludedFiles, setModelExcludedFiles] = useState(excludedFiles);
  const includedDirs = searchParams.get('included_dirs') || '';
  const includedFiles = searchParams.get('included_files') || '';
  const [modelIncludedDirs, setModelIncludedDirs] = useState(includedDirs);
  const [modelIncludedFiles, setModelIncludedFiles] = useState(includedFiles);


  // Wiki type state - default to comprehensive view
  const isComprehensiveParam = searchParams.get('comprehensive') !== 'false';
  const [isComprehensiveView, setIsComprehensiveView] = useState(isComprehensiveParam);
  // Using useRef for activeContentRequests to maintain a single instance across renders
  // This map tracks which pages are currently being processed to prevent duplicate requests
  // Note: In a multi-threaded environment, additional synchronization would be needed,
  // but in React's single-threaded model, this is safe as long as we set the flag before any async operations
  const activeContentRequests = useRef(new Map<string, boolean>()).current;
  const [structureRequestInProgress, setStructureRequestInProgress] = useState(false);
  // Create a flag to track if data was loaded from cache to prevent immediate re-save
  const cacheLoadedSuccessfully = useRef(false);

  // Create a flag to ensure the effect only runs once
  const effectRan = React.useRef(false);

  // State for Ask modal
  const [isAskModalOpen, setIsAskModalOpen] = useState(false);
  const askComponentRef = useRef<{ clearConversation: () => void } | null>(null);

  // Authentication state
  const [authRequired, setAuthRequired] = useState<boolean>(false);
  const [authCode, setAuthCode] = useState<string>('');
  const [isAuthLoading, setIsAuthLoading] = useState<boolean>(true);

  // Default branch state
  const [defaultBranch, setDefaultBranch] = useState<string>('main');

  // Helper function to generate proper repository file URLs
  const generateFileUrl = useCallback((filePath: string): string => {
    if (effectiveRepoInfo.type === 'local') {
      // For local repositories, we can't generate web URLs
      return filePath;
    }

    const repoUrl = effectiveRepoInfo.repoUrl;
    if (!repoUrl) {
      return filePath;
    }

    try {
      const url = new URL(repoUrl);
      const hostname = url.hostname;
      
      if (hostname === 'github.com' || hostname.includes('github')) {
        // GitHub URL format: https://github.com/owner/repo/blob/branch/path
        return `${repoUrl}/blob/${defaultBranch}/${filePath}`;
      } else if (hostname === 'gitlab.com' || hostname.includes('gitlab')) {
        // GitLab URL format: https://gitlab.com/owner/repo/-/blob/branch/path
        return `${repoUrl}/-/blob/${defaultBranch}/${filePath}`;
      } else if (hostname === 'bitbucket.org' || hostname.includes('bitbucket')) {
        // Bitbucket URL format: https://bitbucket.org/owner/repo/src/branch/path
        return `${repoUrl}/src/${defaultBranch}/${filePath}`;
      }
    } catch (error) {
      console.warn('Error generating file URL:', error);
    }

    // Fallback to just the file path
    return filePath;
  }, [effectiveRepoInfo, defaultBranch]);

  // Memoize repo info to avoid triggering updates in callbacks

  // Add useEffect to handle scroll reset
  useEffect(() => {
    // Scroll to top when currentPageId changes
    const wikiContent = document.getElementById('wiki-content');
    if (wikiContent) {
      wikiContent.scrollTo({ top: 0, behavior: 'smooth' });
    }
  }, [currentPageId]);

  // close the modal when escape is pressed
  useEffect(() => {
    const handleEsc = (event: KeyboardEvent) => {
      if (event.key === 'Escape') {
        setIsAskModalOpen(false);
      }
    };

    if (isAskModalOpen) {
      window.addEventListener('keydown', handleEsc);
    }

    // Cleanup on unmount or when modal closes
    return () => {
      window.removeEventListener('keydown', handleEsc);
    };
  }, [isAskModalOpen]);

  // Fetch authentication status on component mount
  useEffect(() => {
    const fetchAuthStatus = async () => {
      try {
        setIsAuthLoading(true);
        const response = await fetch('/api/auth/status');
        if (!response.ok) {
          throw new Error(`HTTP error! status: ${response.status}`);
        }
        const data = await response.json();
        setAuthRequired(data.auth_required);
      } catch (err) {
        console.error("Failed to fetch auth status:", err);
        // Assuming auth is required if fetch fails to avoid blocking UI for safety
        setAuthRequired(true);
      } finally {
        setIsAuthLoading(false);
      }
    };

    fetchAuthStatus();
  }, []);

  // Generate content for a wiki page
  const generatePageContent = useCallback(async (page: WikiPage, owner: string, repo: string) => {
    return new Promise<void>(async (resolve) => {
      try {
        // Skip if content already exists
        if (generatedPages[page.id]?.content) {
          resolve();
          return;
        }

        // Skip if this page is already being processed
        // Use a synchronized pattern to avoid race conditions
        if (activeContentRequests.get(page.id)) {
          console.log(`Page ${page.id} (${page.title}) is already being processed, skipping duplicate call`);
          resolve();
          return;
        }

        // Mark this page as being processed immediately to prevent race conditions
        // This ensures that if multiple calls happen nearly simultaneously, only one proceeds
        activeContentRequests.set(page.id, true);

        // Validate repo info
        if (!owner || !repo) {
          throw new Error('Invalid repository information. Owner and repo name are required.');
        }

        // Mark page as in progress
        setPagesInProgress(prev => new Set(prev).add(page.id));
        // Don't set loading message for individual pages during queue processing

        const filePaths = page.filePaths;

        // Store the initially generated content BEFORE rendering/potential modification
        setGeneratedPages(prev => ({
          ...prev,
          [page.id]: { ...page, content: 'Loading...' } // Placeholder
        }));
        setOriginalMarkdown(prev => ({ ...prev, [page.id]: '' })); // Clear previous original

        // Make API call to generate page content
        console.log(`Starting content generation for page: ${page.title}`);

        // Get repository URL
        const repoUrl = getRepoUrl(effectiveRepoInfo);

        // Create the prompt content - simplified to avoid message dialogs
 const promptContent =
`You are an expert technical writer and software architect.
Your task is to generate a comprehensive and accurate technical wiki page in Markdown format about a specific feature, system, or module within a given software project.

You will be given:
1. The "[WIKI_PAGE_TOPIC]" for the page you need to create.
2. A list of "[RELEVANT_SOURCE_FILES]" from the project that you MUST use as the sole basis for the content. You have access to the full content of these files. You MUST use AT LEAST 5 relevant source files for comprehensive coverage - if fewer are provided, search for additional related files in the codebase.

CRITICAL STARTING INSTRUCTION:
The very first thing on the page MUST be a \`<details>\` block listing ALL the \`[RELEVANT_SOURCE_FILES]\` you used to generate the content. There MUST be AT LEAST 5 source files listed - if fewer were provided, you MUST find additional related files to include.
Format it exactly like this:
<details>
<summary>Relevant source files</summary>

Remember, do not provide any acknowledgements, disclaimers, apologies, or any other preface before the \`<details>\` block. JUST START with the \`<details>\` block.
The following files were used as context for generating this wiki page:

${filePaths.map(path => `- [${path}](${generateFileUrl(path)})`).join('\n')}
<!-- Add additional relevant files if fewer than 5 were provided -->
</details>

Immediately after the \`<details>\` block, the main title of the page should be a H1 Markdown heading: \`# ${page.title}\`.

Based ONLY on the content of the \`[RELEVANT_SOURCE_FILES]\`:

1.  **Introduction:** Start with a concise introduction (1-2 paragraphs) explaining the purpose, scope, and high-level overview of "${page.title}" within the context of the overall project. If relevant, and if information is available in the provided files, link to other potential wiki pages using the format \`[Link Text](#page-anchor-or-id)\`.

2.  **Detailed Sections:** Break down "${page.title}" into logical sections using H2 (\`##\`) and H3 (\`###\`) Markdown headings. For each section:
    *   Explain the architecture, components, data flow, or logic relevant to the section's focus, as evidenced in the source files.
    *   Identify key functions, classes, data structures, API endpoints, or configuration elements pertinent to that section.

3.  **Mermaid Diagrams:**
    *   EXTENSIVELY use Mermaid diagrams (e.g., \`flowchart TD\`, \`sequenceDiagram\`, \`classDiagram\`, \`erDiagram\`, \`graph TD\`) to visually represent architectures, flows, relationships, and schemas found in the source files.
    *   Ensure diagrams are accurate and directly derived from information in the \`[RELEVANT_SOURCE_FILES]\`.
    *   Provide a brief explanation before or after each diagram to give context.
    *   CRITICAL: All diagrams MUST follow strict vertical orientation:
       - Use "graph TD" (top-down) directive for flow diagrams
       - NEVER use "graph LR" (left-right)
       - Maximum node width should be 3-4 words
       - For sequence diagrams:
         - Start with "sequenceDiagram" directive on its own line
         - Define ALL participants at the beginning using "participant" keyword
         - Optionally specify participant types: actor, boundary, control, entity, database, collections, queue
         - Use descriptive but concise participant names, or use aliases: "participant A as Alice"
         - Use the correct Mermaid arrow syntax (8 types available):
           - -> solid line without arrow (rarely used)
           - --> dotted line without arrow (rarely used)
           - ->> solid line with arrowhead (most common for requests/calls)
           - -->> dotted line with arrowhead (most common for responses/returns)
           - ->x solid line with X at end (failed/error message)
           - -->x dotted line with X at end (failed/error response)
           - -) solid line with open arrow (async message, fire-and-forget)
           - --) dotted line with open arrow (async response)
           - Examples: A->>B: Request, B-->>A: Response, A->xB: Error, A-)B: Async event
         - Use +/- suffix for activation boxes: A->>+B: Start (activates B), B-->>-A: End (deactivates B)
         - Group related participants using "box": box GroupName ... end
         - Use structural elements for complex flows:
           - loop LoopText ... end (for iterations)
           - alt ConditionText ... else ... end (for conditionals)
           - opt OptionalText ... end (for optional flows)
           - par ParallelText ... and ... end (for parallel actions)
           - critical CriticalText ... option ... end (for critical regions)
           - break BreakText ... end (for breaking flows/exceptions)
         - Add notes for clarification: "Note over A,B: Description", "Note right of A: Detail"
         - Use autonumber directive to add sequence numbers to messages
         - NEVER use flowchart-style labels like A--|label|-->B. Always use a colon for labels: A->>B: My Label

4.  **Tables:**
    *   Use Markdown tables to summarize information such as:
        *   Key features or components and their descriptions.
        *   API endpoint parameters, types, and descriptions.
        *   Configuration options, their types, and default values.
        *   Data model fields, types, constraints, and descriptions.

5.  **Code Snippets (ENTIRELY OPTIONAL):**
    *   Include short, relevant code snippets (e.g., Python, Java, JavaScript, SQL, JSON, YAML) directly from the \`[RELEVANT_SOURCE_FILES]\` to illustrate key implementation details, data structures, or configurations.
    *   Ensure snippets are well-formatted within Markdown code blocks with appropriate language identifiers.

6.  **Source Citations (EXTREMELY IMPORTANT):**
    *   For EVERY piece of significant information, explanation, diagram, table entry, or code snippet, you MUST cite the specific source file(s) and relevant line numbers from which the information was derived.
    *   Place citations at the end of the paragraph, under the diagram/table, or after the code snippet.
    *   Use the exact format: \`Sources: [filename.ext:start_line-end_line]()\` for a range, or \`Sources: [filename.ext:line_number]()\` for a single line. Multiple files can be cited: \`Sources: [file1.ext:1-10](), [file2.ext:5](), [dir/file3.ext]()\` (if the whole file is relevant and line numbers are not applicable or too broad).
    *   If an entire section is overwhelmingly based on one or two files, you can cite them under the section heading in addition to more specific citations within the section.
    *   IMPORTANT: You MUST cite AT LEAST 5 different source files throughout the wiki page to ensure comprehensive coverage.

7.  **Technical Accuracy:** All information must be derived SOLELY from the \`[RELEVANT_SOURCE_FILES]\`. Do not infer, invent, or use external knowledge about similar systems or common practices unless it's directly supported by the provided code. If information is not present in the provided files, do not include it or explicitly state its absence if crucial to the topic.

8.  **Clarity and Conciseness:** Use clear, professional, and concise technical language suitable for other developers working on or learning about the project. Avoid unnecessary jargon, but use correct technical terms where appropriate.

9.  **Conclusion/Summary:** End with a brief summary paragraph if appropriate for "${page.title}", reiterating the key aspects covered and their significance within the project.

IMPORTANT: Generate the content in ${language === 'en' ? 'English' :
            language === 'ja' ? 'Japanese (日本語)' :
            language === 'zh' ? 'Mandarin Chinese (中文)' :
            language === 'zh-tw' ? 'Traditional Chinese (繁體中文)' :
            language === 'es' ? 'Spanish (Español)' :
            language === 'kr' ? 'Korean (한국어)' :
            language === 'vi' ? 'Vietnamese (Tiếng Việt)' : 
            language === "pt-br" ? "Brazilian Portuguese (Português Brasileiro)" :
            language === "fr" ? "Français (French)" :
            language === "ru" ? "Русский (Russian)" :
            'English'} language.

Remember:
- Ground every claim in the provided source files.
- Prioritize accuracy and direct representation of the code's functionality and structure.
- Structure the document logically for easy understanding by other developers.
`;

        // Prepare request body
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
        const requestBody: Record<string, any> = {
          repo_url: repoUrl,
          type: effectiveRepoInfo.type,
          messages: [{
            role: 'user',
            content: promptContent
          }]
        };

        // Add tokens if available
        addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles);

        // Use WebSocket for communication
        let content = '';

        try {
          // Create WebSocket URL from the server base URL
          const serverBaseUrl = process.env.SERVER_BASE_URL || 'http://localhost:8001';
          const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws')? serverBaseUrl.replace(/^https/, 'wss'): serverBaseUrl.replace(/^http/, 'ws');
          const wsUrl = `${wsBaseUrl}/ws/chat`;

          // Create a new WebSocket connection
          const ws = new WebSocket(wsUrl);

          // Create a promise that resolves when the WebSocket connection is complete
          await new Promise<void>((resolve, reject) => {
            // Set up event handlers
            ws.onopen = () => {
              console.log(`WebSocket connection established for page: ${page.title}`);
              // Send the request as JSON
              ws.send(JSON.stringify(requestBody));
              resolve();
            };

            ws.onerror = (error) => {
              console.error('WebSocket error:', error);
              reject(new Error('WebSocket connection failed'));
            };

            // If the connection doesn't open within 5 seconds, fall back to HTTP
            const timeout = setTimeout(() => {
              reject(new Error('WebSocket connection timeout'));
            }, 5000);

            // Clear the timeout if the connection opens successfully
            ws.onopen = () => {
              clearTimeout(timeout);
              console.log(`WebSocket connection established for page: ${page.title}`);
              // Send the request as JSON
              ws.send(JSON.stringify(requestBody));
              resolve();
            };
          });

          // Create a promise that resolves when the WebSocket response is complete
          await new Promise<void>((resolve, reject) => {
            // Handle incoming messages
            ws.onmessage = (event) => {
              content += event.data;
            };

            // Handle WebSocket close
            ws.onclose = () => {
              console.log(`WebSocket connection closed for page: ${page.title}`);
              resolve();
            };

            // Handle WebSocket errors
            ws.onerror = (error) => {
              console.error('WebSocket error during message reception:', error);
              reject(new Error('WebSocket error during message reception'));
            };
          });
        } catch (wsError) {
          console.error('WebSocket error, falling back to HTTP:', wsError);

          // Fall back to HTTP if WebSocket fails
          const response = await fetch(`/api/chat/stream`, {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify(requestBody)
          });

          if (!response.ok) {
            const errorText = await response.text().catch(() => 'No error details available');
            console.error(`API error (${response.status}): ${errorText}`);
            throw new Error(`Error generating page content: ${response.status} - ${response.statusText}`);
          }

          // Process the response
          content = '';
          const reader = response.body?.getReader();
          const decoder = new TextDecoder();

          if (!reader) {
            throw new Error('Failed to get response reader');
          }

          try {
            while (true) {
              const { done, value } = await reader.read();
              if (done) break;
              content += decoder.decode(value, { stream: true });
            }
            // Ensure final decoding
            content += decoder.decode();
          } catch (readError) {
            console.error('Error reading stream:', readError);
            throw new Error('Error processing response stream');
          }
        }

        // Clean up markdown delimiters
        content = content.replace(/^```markdown\s*/i, '').replace(/```\s*$/i, '');

        console.log(`Received content for ${page.title}, length: ${content.length} characters`);

        // Store the FINAL generated content
        const updatedPage = { ...page, content };
        setGeneratedPages(prev => ({ ...prev, [page.id]: updatedPage }));
        // Store this as the original for potential mermaid retries
        setOriginalMarkdown(prev => ({ ...prev, [page.id]: content }));

        resolve();
      } catch (err) {
        console.error(`Error generating content for page ${page.id}:`, err);
        const errorMessage = err instanceof Error ? err.message : 'Unknown error';
        // Update page state to show error
        setGeneratedPages(prev => ({
          ...prev,
          [page.id]: { ...page, content: `Error generating content: ${errorMessage}` }
        }));
        setError(`Failed to generate content for ${page.title}.`);
        resolve(); // Resolve even on error to unblock queue
      } finally {
        // Clear the processing flag for this page
        // This must happen in the finally block to ensure the flag is cleared
        // even if an error occurs during processing
        activeContentRequests.delete(page.id);

        // Mark page as done
        setPagesInProgress(prev => {
          const next = new Set(prev);
          next.delete(page.id);
          return next;
        });
        setLoadingMessage(undefined); // Clear specific loading message
      }
    });
  }, [generatedPages, currentToken, effectiveRepoInfo, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, modelExcludedDirs, modelExcludedFiles, language, activeContentRequests, generateFileUrl]);

  // Determine the wiki structure from repository data
  const determineWikiStructure = useCallback(async (fileTree: string, readme: string, owner: string, repo: string) => {
    if (!owner || !repo) {
      setError('Invalid repository information. Owner and repo name are required.');
      setIsLoading(false);
      setEmbeddingError(false); // Reset embedding error state
      return;
    }

    // Skip if structure request is already in progress
    if (structureRequestInProgress) {
      console.log('Wiki structure determination already in progress, skipping duplicate call');
      return;
    }

    try {
      setStructureRequestInProgress(true);
      setLoadingMessage(messages.loading?.determiningStructure || 'Determining wiki structure...');

      // Get repository URL
      const repoUrl = getRepoUrl(effectiveRepoInfo);

      // Prepare request body
      // eslint-disable-next-line @typescript-eslint/no-explicit-any
      const requestBody: Record<string, any> = {
        repo_url: repoUrl,
        type: effectiveRepoInfo.type,
        messages: [{
          role: 'user',
content: `Analyze this GitHub repository ${owner}/${repo} and create a wiki structure for it.

1. The complete file tree of the project:
<file_tree>
${fileTree}
</file_tree>

2. The README file of the project:
<readme>
${readme}
</readme>

I want to create a wiki for this repository. Determine the most logical structure for a wiki based on the repository's content.

IMPORTANT: The wiki content will be generated in ${language === 'en' ? 'English' :
            language === 'ja' ? 'Japanese (日本語)' :
            language === 'zh' ? 'Mandarin Chinese (中文)' :
            language === 'zh-tw' ? 'Traditional Chinese (繁體中文)' :
            language === 'es' ? 'Spanish (Español)' :
            language === 'kr' ? 'Korean (한国語)' :
            language === 'vi' ? 'Vietnamese (Tiếng Việt)' :
            language === "pt-br" ? "Brazilian Portuguese (Português Brasileiro)" :
            language === "fr" ? "Français (French)" :
            language === "ru" ? "Русский (Russian)" :
            'English'} language.

When designing the wiki structure, include pages that would benefit from visual diagrams, such as:
- Architecture overviews
- Data flow descriptions
- Component relationships
- Process workflows
- State machines
- Class hierarchies

${isComprehensiveView ? `
Create a structured wiki with the following main sections:
- Overview (general information about the project)
- System Architecture (how the system is designed)
- Core Features (key functionality)
- Data Management/Flow: If applicable, how data is stored, processed, accessed, and managed (e.g., database schema, data pipelines, state management).
- Frontend Components (UI elements, if applicable.)
- Backend Systems (server-side components)
- Model Integration (AI model connections)
- Deployment/Infrastructure (how to deploy, what's the infrastructure like)
- Extensibility and Customization: If the project architecture supports it, explain how to extend or customize its functionality (e.g., plugins, theming, custom modules, hooks).

Each section should contain relevant pages. For example, the "Frontend Components" section might include pages for "Home Page", "Repository Wiki Page", "Ask Component", etc.

Return your analysis in the following XML format:

<wiki_structure>
  <title>[Overall title for the wiki]</title>
  <description>[Brief description of the repository]</description>
  <sections>
    <section id="section-1">
      <title>[Section title]</title>
      <pages>
        <page_ref>page-1</page_ref>
        <page_ref>page-2</page_ref>
      </pages>
      <subsections>
        <section_ref>section-2</section_ref>
      </subsections>
    </section>
    <!-- More sections as needed -->
  </sections>
  <pages>
    <page id="page-1">
      <title>[Page title]</title>
      <description>[Brief description of what this page will cover]</description>
      <importance>high|medium|low</importance>
      <relevant_files>
        <file_path>[Path to a relevant file]</file_path>
        <!-- More file paths as needed -->
      </relevant_files>
      <related_pages>
        <related>page-2</related>
        <!-- More related page IDs as needed -->
      </related_pages>
      <parent_section>section-1</parent_section>
    </page>
    <!-- More pages as needed -->
  </pages>
</wiki_structure>
` : `
Return your analysis in the following XML format:

<wiki_structure>
  <title>[Overall title for the wiki]</title>
  <description>[Brief description of the repository]</description>
  <pages>
    <page id="page-1">
      <title>[Page title]</title>
      <description>[Brief description of what this page will cover]</description>
      <importance>high|medium|low</importance>
      <relevant_files>
        <file_path>[Path to a relevant file]</file_path>
        <!-- More file paths as needed -->
      </relevant_files>
      <related_pages>
        <related>page-2</related>
        <!-- More related page IDs as needed -->
      </related_pages>
    </page>
    <!-- More pages as needed -->
  </pages>
</wiki_structure>
`}

IMPORTANT FORMATTING INSTRUCTIONS:
- Return ONLY the valid XML structure specified above
- DO NOT wrap the XML in markdown code blocks (no \`\`\` or \`\`\`xml)
- DO NOT include any explanation text before or after the XML
- Ensure the XML is properly formatted and valid
- Start directly with <wiki_structure> and end with </wiki_structure>

IMPORTANT:
1. Create ${isComprehensiveView ? '8-12' : '4-6'} pages that would make a ${isComprehensiveView ? 'comprehensive' : 'concise'} wiki for this repository
2. Each page should focus on a specific aspect of the codebase (e.g., architecture, key features, setup)
3. The relevant_files should be actual files from the repository that would be used to generate that page
4. Return ONLY valid XML with the structure specified above, with no markdown code block delimiters`
        }]
      };

      // Add tokens if available
      addTokensToRequestBody(requestBody, currentToken, effectiveRepoInfo.type, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, language, modelExcludedDirs, modelExcludedFiles, modelIncludedDirs, modelIncludedFiles);

      // Use WebSocket for communication
      let responseText = '';

      try {
        // Create WebSocket URL from the server base URL
        const serverBaseUrl = process.env.SERVER_BASE_URL || 'http://localhost:8001';
        const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws')? serverBaseUrl.replace(/^https/, 'wss'): serverBaseUrl.replace(/^http/, 'ws');
        const wsUrl = `${wsBaseUrl}/ws/chat`;

        // Create a new WebSocket connection
        const ws = new WebSocket(wsUrl);

        // Create a promise that resolves when the WebSocket connection is complete
        await new Promise<void>((resolve, reject) => {
          // Set up event handlers
          ws.onopen = () => {
            console.log('WebSocket connection established for wiki structure');
            // Send the request as JSON
            ws.send(JSON.stringify(requestBody));
            resolve();
          };

          ws.onerror = (error) => {
            console.error('WebSocket error:', error);
            reject(new Error('WebSocket connection failed'));
          };

          // If the connection doesn't open within 5 seconds, fall back to HTTP
          const timeout = setTimeout(() => {
            reject(new Error('WebSocket connection timeout'));
          }, 5000);

          // Clear the timeout if the connection opens successfully
          ws.onopen = () => {
            clearTimeout(timeout);
            console.log('WebSocket connection established for wiki structure');
            // Send the request as JSON
            ws.send(JSON.stringify(requestBody));
            resolve();
          };
        });

        // Create a promise that resolves when the WebSocket response is complete
        await new Promise<void>((resolve, reject) => {
          // Handle incoming messages
          ws.onmessage = (event) => {
            responseText += event.data;
          };

          // Handle WebSocket close
          ws.onclose = () => {
            console.log('WebSocket connection closed for wiki structure');
            resolve();
          };

          // Handle WebSocket errors
          ws.onerror = (error) => {
            console.error('WebSocket error during message reception:', error);
            reject(new Error('WebSocket error during message reception'));
          };
        });
      } catch (wsError) {
        console.error('WebSocket error, falling back to HTTP:', wsError);

        // Fall back to HTTP if WebSocket fails
        const response = await fetch(`/api/chat/stream`, {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
          },
          body: JSON.stringify(requestBody)
        });

        if (!response.ok) {
          throw new Error(`Error determining wiki structure: ${response.status}`);
        }

        // Process the response
        responseText = '';
        const reader = response.body?.getReader();
        const decoder = new TextDecoder();

        if (!reader) {
          throw new Error('Failed to get response reader');
        }

        while (true) {
          const { done, value } = await reader.read();
          if (done) break;
          responseText += decoder.decode(value, { stream: true });
        }
      }

      if(responseText.includes('Error preparing retriever: Environment variable OPENAI_API_KEY must be set')) {
         setEmbeddingError(true);
         throw new Error('OPENAI_API_KEY environment variable is not set. Please configure your OpenAI API key.');
       }

       if(responseText.includes('Ollama model') && responseText.includes('not found')) {
         setEmbeddingError(true);
         throw new Error('The specified Ollama embedding model was not found. Please ensure the model is installed locally or select a different embedding model in the configuration.');
       }

        // Clean up markdown delimiters
      responseText = responseText.replace(/^```(?:xml)?\s*/i, '').replace(/```\s*$/i, '');

      // Extract wiki structure from response
      const xmlMatch = responseText.match(/<wiki_structure>[\s\S]*?<\/wiki_structure>/m);
      if (!xmlMatch) {
        throw new Error('No valid XML found in response');
      }

      let xmlText = xmlMatch[0];
      xmlText = xmlText.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, '');
      // Try parsing with DOMParser
      const parser = new DOMParser();
      const xmlDoc = parser.parseFromString(xmlText, "text/xml");

      // Check for parsing errors
      const parseError = xmlDoc.querySelector('parsererror');
      if (parseError) {
        // Log the first few elements to see what was parsed
        const elements = xmlDoc.querySelectorAll('*');
        if (elements.length > 0) {
          console.log('First 5 element names:',
            Array.from(elements).slice(0, 5).map(el => el.nodeName).join(', '));
        }

        // We'll continue anyway since the XML might still be usable
      }

      // Extract wiki structure
      let title = '';
      let description = '';
      let pages: WikiPage[] = [];

      // Try using DOM parsing first
      const titleEl = xmlDoc.querySelector('title');
      const descriptionEl = xmlDoc.querySelector('description');
      const pagesEls = xmlDoc.querySelectorAll('page');

      title = titleEl ? titleEl.textContent || '' : '';
      description = descriptionEl ? descriptionEl.textContent || '' : '';

      // Parse pages using DOM
      pages = [];

      if (parseError && (!pagesEls || pagesEls.length === 0)) {
        console.warn('DOM parsing failed, trying regex fallback');
      }

      pagesEls.forEach(pageEl => {
        const id = pageEl.getAttribute('id') || `page-${pages.length + 1}`;
        const titleEl = pageEl.querySelector('title');
        const importanceEl = pageEl.querySelector('importance');
        const filePathEls = pageEl.querySelectorAll('file_path');
        const relatedEls = pageEl.querySelectorAll('related');

        const title = titleEl ? titleEl.textContent || '' : '';
        const importance = importanceEl ?
          (importanceEl.textContent === 'high' ? 'high' :
            importanceEl.textContent === 'medium' ? 'medium' : 'low') : 'medium';

        const filePaths: string[] = [];
        filePathEls.forEach(el => {
          if (el.textContent) filePaths.push(el.textContent);
        });

        const relatedPages: string[] = [];
        relatedEls.forEach(el => {
          if (el.textContent) relatedPages.push(el.textContent);
        });

        pages.push({
          id,
          title,
          content: '', // Will be generated later
          filePaths,
          importance,
          relatedPages
        });
      });

      // Extract sections if they exist in the XML
      const sections: WikiSection[] = [];
      const rootSections: string[] = [];

      // Try to parse sections if we're in comprehensive view
      if (isComprehensiveView) {
        const sectionsEls = xmlDoc.querySelectorAll('section');

        if (sectionsEls && sectionsEls.length > 0) {
          // Process sections
          sectionsEls.forEach(sectionEl => {
            const id = sectionEl.getAttribute('id') || `section-${sections.length + 1}`;
            const titleEl = sectionEl.querySelector('title');
            const pageRefEls = sectionEl.querySelectorAll('page_ref');
            const sectionRefEls = sectionEl.querySelectorAll('section_ref');

            const title = titleEl ? titleEl.textContent || '' : '';
            const pages: string[] = [];
            const subsections: string[] = [];

            pageRefEls.forEach(el => {
              if (el.textContent) pages.push(el.textContent);
            });

            sectionRefEls.forEach(el => {
              if (el.textContent) subsections.push(el.textContent);
            });

            sections.push({
              id,
              title,
              pages,
              subsections: subsections.length > 0 ? subsections : undefined
            });

            // Check if this is a root section (not referenced by any other section)
            let isReferenced = false;
            sectionsEls.forEach(otherSection => {
              const otherSectionRefs = otherSection.querySelectorAll('section_ref');
              otherSectionRefs.forEach(ref => {
                if (ref.textContent === id) {
                  isReferenced = true;
                }
              });
            });

            if (!isReferenced) {
              rootSections.push(id);
            }
          });
        }
      }

      // Create wiki structure
      const wikiStructure: WikiStructure = {
        id: 'wiki',
        title,
        description,
        pages,
        sections,
        rootSections
      };

      setWikiStructure(wikiStructure);
      setCurrentPageId(pages.length > 0 ? pages[0].id : undefined);

      // Start generating content for all pages with controlled concurrency
      if (pages.length > 0) {
        // Mark all pages as in progress
        const initialInProgress = new Set(pages.map(p => p.id));
        setPagesInProgress(initialInProgress);

        console.log(`Starting generation for ${pages.length} pages with controlled concurrency`);

        // Maximum concurrent requests
        const MAX_CONCURRENT = 1;

        // Create a queue of pages
        const queue = [...pages];
        let activeRequests = 0;

        // Function to process next items in queue
        const processQueue = () => {
          // Process as many items as we can up to our concurrency limit
          while (queue.length > 0 && activeRequests < MAX_CONCURRENT) {
            const page = queue.shift();
            if (page) {
              activeRequests++;
              console.log(`Starting page ${page.title} (${activeRequests} active, ${queue.length} remaining)`);

              // Start generating content for this page
              generatePageContent(page, owner, repo)
                .finally(() => {
                  // When done (success or error), decrement active count and process more
                  activeRequests--;
                  console.log(`Finished page ${page.title} (${activeRequests} active, ${queue.length} remaining)`);

                  // Check if all work is done (queue empty and no active requests)
                  if (queue.length === 0 && activeRequests === 0) {
                    console.log("All page generation tasks completed.");
                    setIsLoading(false);
                    setLoadingMessage(undefined);
                  } else {
                    // Only process more if there are items remaining and we're under capacity
                    if (queue.length > 0 && activeRequests < MAX_CONCURRENT) {
                      processQueue();
                    }
                  }
                });
            }
          }

          // Additional check: If the queue started empty or becomes empty and no requests were started/active
          if (queue.length === 0 && activeRequests === 0 && pages.length > 0 && pagesInProgress.size === 0) {
            // This handles the case where the queue might finish before the finally blocks fully update activeRequests
            // or if the initial queue was processed very quickly
            console.log("Queue empty and no active requests after loop, ensuring loading is false.");
            setIsLoading(false);
            setLoadingMessage(undefined);
          } else if (pages.length === 0) {
            // Handle case where there were no pages to begin with
            setIsLoading(false);
            setLoadingMessage(undefined);
          }
        };

        // Start processing the queue
        processQueue();
      } else {
        // Set loading to false if there were no pages found
        setIsLoading(false);
        setLoadingMessage(undefined);
      }

    } catch (error) {
      console.error('Error determining wiki structure:', error);
      setIsLoading(false);
      setError(error instanceof Error ? error.message : 'An unknown error occurred');
      setLoadingMessage(undefined);
    } finally {
      setStructureRequestInProgress(false);
    }
  }, [generatePageContent, currentToken, effectiveRepoInfo, pagesInProgress.size, structureRequestInProgress, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, modelExcludedDirs, modelExcludedFiles, language, messages.loading, isComprehensiveView]);

  // Fetch repository structure using GitHub or GitLab API
  const fetchRepositoryStructure = useCallback(async () => {
    // If a request is already in progress, don't start another one
    if (requestInProgress) {
      console.log('Repository fetch already in progress, skipping duplicate call');
      return;
    }

    // Reset previous state
    setWikiStructure(undefined);
    setCurrentPageId(undefined);
    setGeneratedPages({});
    setPagesInProgress(new Set());
    setError(null);
    setEmbeddingError(false); // Reset embedding error state

    try {
      // Set the request in progress flag
      setRequestInProgress(true);

      // Update loading state
      setIsLoading(true);
      setLoadingMessage(messages.loading?.fetchingStructure || 'Fetching repository structure...');

      let fileTreeData = '';
      let readmeContent = '';

      if (effectiveRepoInfo.type === 'local' && effectiveRepoInfo.localPath) {
        try {
          const response = await fetch(`/local_repo/structure?path=${encodeURIComponent(effectiveRepoInfo.localPath)}`);

          if (!response.ok) {
            const errorData = await response.text();
            throw new Error(`Local repository API error (${response.status}): ${errorData}`);
          }

          const data = await response.json();
          fileTreeData = data.file_tree;
          readmeContent = data.readme;
          // For local repos, we can't determine the actual branch, so use 'main' as default
          setDefaultBranch('main');
        } catch (err) {
          throw err;
        }
      } else if (effectiveRepoInfo.type === 'github') {
        // GitHub API approach
        // Try to get the tree data for common branch names
        let treeData = null;
        let apiErrorDetails = '';

        // Determine the GitHub API base URL based on the repository URL
        const getGithubApiUrl = (repoUrl: string | null): string => {
          if (!repoUrl) {
            return 'https://api.github.com'; // Default to public GitHub
          }
          
          try {
            const url = new URL(repoUrl);
            const hostname = url.hostname;
            
            // If it's the public GitHub, use the standard API URL
            if (hostname === 'github.com') {
              return 'https://api.github.com';
            }
            
            // For GitHub Enterprise, use the enterprise API URL format
            // GitHub Enterprise API URL format: https://github.company.com/api/v3
            return `${url.protocol}//${hostname}/api/v3`;
          } catch {
            return 'https://api.github.com'; // Fallback to public GitHub if URL parsing fails
          }
        };

        const githubApiBaseUrl = getGithubApiUrl(effectiveRepoInfo.repoUrl);
        // First, try to get the default branch from the repository info
        let defaultBranchLocal = null;
        try {
          const repoInfoResponse = await fetch(`${githubApiBaseUrl}/repos/${owner}/${repo}`, {
            headers: createGithubHeaders(currentToken)
          });
          
          if (repoInfoResponse.ok) {
            const repoData = await repoInfoResponse.json();
            defaultBranchLocal = repoData.default_branch;
            console.log(`Found default branch: ${defaultBranchLocal}`);
            // Store the default branch in state
            setDefaultBranch(defaultBranchLocal || 'main');
          }
        } catch (err) {
          console.warn('Could not fetch repository info for default branch:', err);
        }

        // Create list of branches to try, prioritizing the actual default branch
        const branchesToTry = defaultBranchLocal 
          ? [defaultBranchLocal, 'main', 'master'].filter((branch, index, arr) => arr.indexOf(branch) === index)
          : ['main', 'master'];

        for (const branch of branchesToTry) {
          const apiUrl = `${githubApiBaseUrl}/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`;
          const headers = createGithubHeaders(currentToken);

          console.log(`Fetching repository structure from branch: ${branch}`);
          try {
            const response = await fetch(apiUrl, {
              headers
            });

            if (response.ok) {
              treeData = await response.json();
              console.log('Successfully fetched repository structure');
              break;
            } else {
              const errorData = await response.text();
              apiErrorDetails = `Status: ${response.status}, Response: ${errorData}`;
              console.error(`Error fetching repository structure: ${apiErrorDetails}`);
            }
          } catch (err) {
            console.error(`Network error fetching branch ${branch}:`, err);
          }
        }

        if (!treeData || !treeData.tree) {
          if (apiErrorDetails) {
            throw new Error(`Could not fetch repository structure. API Error: ${apiErrorDetails}`);
          } else {
            throw new Error('Could not fetch repository structure. Repository might not exist, be empty or private.');
          }
        }

        // Convert tree data to a string representation
        fileTreeData = treeData.tree
          .filter((item: { type: string; path: string }) => item.type === 'blob')
          .map((item: { type: string; path: string }) => item.path)
          .join('\n');

        // Try to fetch README.md content
        try {
          const headers = createGithubHeaders(currentToken);

          const readmeResponse = await fetch(`${githubApiBaseUrl}/repos/${owner}/${repo}/readme`, {
            headers
          });

          if (readmeResponse.ok) {
            const readmeData = await readmeResponse.json();
            readmeContent = atob(readmeData.content);
          } else {
            console.warn(`Could not fetch README.md, status: ${readmeResponse.status}`);
          }
        } catch (err) {
          console.warn('Could not fetch README.md, continuing with empty README', err);
        }
      }
      else if (effectiveRepoInfo.type === 'gitlab') {
        // GitLab API approach
        const projectPath = extractUrlPath(effectiveRepoInfo.repoUrl ?? '')?.replace(/\.git$/, '') || `${owner}/${repo}`;
        const projectDomain = extractUrlDomain(effectiveRepoInfo.repoUrl ?? "https://gitlab.com");
        const encodedProjectPath = encodeURIComponent(projectPath);

        const headers = createGitlabHeaders(currentToken);

        /* eslint-disable-next-line @typescript-eslint/no-explicit-any */
        const filesData: any[] = [];

        try {
          // Step 1: Get project info to determine default branch
          let projectInfoUrl: string;
          let defaultBranchLocal = 'main'; // fallback
          try {
            const validatedUrl = new URL(projectDomain ?? ''); // Validate domain
            projectInfoUrl = `${validatedUrl.origin}/api/v4/projects/${encodedProjectPath}`;
          } catch (err) {
            throw new Error(`Invalid project domain URL: ${projectDomain}`);
          }
          const projectInfoRes = await fetch(projectInfoUrl, { headers });

          if (!projectInfoRes.ok) {
            const errorData = await projectInfoRes.text();
            throw new Error(`GitLab project info error: Status ${projectInfoRes.status}, Response: ${errorData}`);
          }

          const projectInfo = await projectInfoRes.json();
          defaultBranchLocal = projectInfo.default_branch || 'main';
          console.log(`Found GitLab default branch: ${defaultBranchLocal}`);
          // Store the default branch in state
          setDefaultBranch(defaultBranchLocal);

          // Step 2: Paginate to fetch full file tree
          let page = 1;
          let morePages = true;
          
          while (morePages) {
            const apiUrl = `${projectInfoUrl}/repository/tree?recursive=true&per_page=100&page=${page}`;
            const response = await fetch(apiUrl, { headers });

            if (!response.ok) {
                const errorData = await response.text();
              throw new Error(`Error fetching GitLab repository structure (page ${page}): ${errorData}`);
            }

            const pageData = await response.json();
            filesData.push(...pageData);

            const nextPage = response.headers.get('x-next-page');
            morePages = !!nextPage;
            page = nextPage ? parseInt(nextPage, 10) : page + 1;
        }

          if (!Array.isArray(filesData) || filesData.length === 0) {
            throw new Error('Could not fetch repository structure. Repository might be empty or inaccessible.');
        }

          // Step 3: Format file paths
        fileTreeData = filesData
          .filter((item: { type: string; path: string }) => item.type === 'blob')
          .map((item: { type: string; path: string }) => item.path)
          .join('\n');

          // Step 4: Try to fetch README.md content
          const readmeUrl = `${projectInfoUrl}/repository/files/README.md/raw`;
            try {
            const readmeResponse = await fetch(readmeUrl, { headers });
              if (readmeResponse.ok) {
                readmeContent = await readmeResponse.text();
                console.log('Successfully fetched GitLab README.md');
              } else {
              console.warn(`Could not fetch GitLab README.md status: ${readmeResponse.status}`);
              }
            } catch (err) {
            console.warn(`Error fetching GitLab README.md:`, err);
            }
        } catch (err) {
          console.error("Error during GitLab repository tree retrieval:", err);
          throw err;
        }
      }
      else if (effectiveRepoInfo.type === 'bitbucket') {
        // Bitbucket API approach
        const repoPath = extractUrlPath(effectiveRepoInfo.repoUrl ?? '') ?? `${owner}/${repo}`;
        const encodedRepoPath = encodeURIComponent(repoPath);

        // Try to get the file tree for common branch names
        let filesData = null;
        let apiErrorDetails = '';
        let defaultBranchLocal = '';
        const headers = createBitbucketHeaders(currentToken);

        // First get project info to determine default branch
        const projectInfoUrl = `https://api.bitbucket.org/2.0/repositories/${encodedRepoPath}`;
        try {
          const response = await fetch(projectInfoUrl, { headers });

          const responseText = await response.text();

          if (response.ok) {
            const projectData = JSON.parse(responseText);
            defaultBranchLocal = projectData.mainbranch.name;
            // Store the default branch in state
            setDefaultBranch(defaultBranchLocal);

            const apiUrl = `https://api.bitbucket.org/2.0/repositories/${encodedRepoPath}/src/${defaultBranchLocal}/?recursive=true&per_page=100`;
            try {
              const response = await fetch(apiUrl, {
                headers
              });

              const structureResponseText = await response.text();

              if (response.ok) {
                filesData = JSON.parse(structureResponseText);
              } else {
                const errorData = structureResponseText;
                apiErrorDetails = `Status: ${response.status}, Response: ${errorData}`;
              }
            } catch (err) {
              console.error(`Network error fetching Bitbucket branch ${defaultBranchLocal}:`, err);
            }
          } else {
            const errorData = responseText;
            apiErrorDetails = `Status: ${response.status}, Response: ${errorData}`;
          }
        } catch (err) {
          console.error("Network error fetching Bitbucket project info:", err);
        }

        if (!filesData || !Array.isArray(filesData.values) || filesData.values.length === 0) {
          if (apiErrorDetails) {
            throw new Error(`Could not fetch repository structure. Bitbucket API Error: ${apiErrorDetails}`);
          } else {
            throw new Error('Could not fetch repository structure. Repository might not exist, be empty or private.');
          }
        }

        // Convert files data to a string representation
        fileTreeData = filesData.values
          .filter((item: { type: string; path: string }) => item.type === 'commit_file')
          .map((item: { type: string; path: string }) => item.path)
          .join('\n');

        // Try to fetch README.md content
        try {
          const headers = createBitbucketHeaders(currentToken);

          const readmeResponse = await fetch(`https://api.bitbucket.org/2.0/repositories/${encodedRepoPath}/src/${defaultBranchLocal}/README.md`, {
            headers
          });

          if (readmeResponse.ok) {
            readmeContent = await readmeResponse.text();
          } else {
            console.warn(`Could not fetch Bitbucket README.md, status: ${readmeResponse.status}`);
          }
        } catch (err) {
          console.warn('Could not fetch Bitbucket README.md, continuing with empty README', err);
        }
      }

      // Now determine the wiki structure
      await determineWikiStructure(fileTreeData, readmeContent, owner, repo);

    } catch (error) {
      console.error('Error fetching repository structure:', error);
      setIsLoading(false);
      setError(error instanceof Error ? error.message : 'An unknown error occurred');
      setLoadingMessage(undefined);
    } finally {
      // Reset the request in progress flag
      setRequestInProgress(false);
    }
  }, [owner, repo, determineWikiStructure, currentToken, effectiveRepoInfo, requestInProgress, messages.loading]);

  // Function to export wiki content
  const exportWiki = useCallback(async (format: 'markdown' | 'json') => {
    if (!wikiStructure || Object.keys(generatedPages).length === 0) {
      setExportError('No wiki content to export');
      return;
    }

    try {
      setIsExporting(true);
      setExportError(null);
      setLoadingMessage(`${language === 'ja' ? 'Wikiを' : 'Exporting wiki as '} ${format} ${language === 'ja' ? 'としてエクスポート中...' : '...'}`);

      // Prepare the pages for export
      const pagesToExport = wikiStructure.pages.map(page => {
        // Use the generated content if available, otherwise use an empty string
        const content = generatedPages[page.id]?.content || 'Content not generated';
        return {
          ...page,
          content
        };
      });

      // Get repository URL
      const repoUrl = getRepoUrl(effectiveRepoInfo);

      // Make API call to export wiki
      const response = await fetch(`/export/wiki`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          repo_url: repoUrl,
          type: effectiveRepoInfo.type,
          pages: pagesToExport,
          format
        })
      });

      if (!response.ok) {
        const errorText = await response.text().catch(() => 'No error details available');
        throw new Error(`Error exporting wiki: ${response.status} - ${errorText}`);
      }

      // Get the filename from the Content-Disposition header if available
      const contentDisposition = response.headers.get('Content-Disposition');
      let filename = `${effectiveRepoInfo.repo}_wiki.${format === 'markdown' ? 'md' : 'json'}`;

      if (contentDisposition) {
        const filenameMatch = contentDisposition.match(/filename=(.+)/);
        if (filenameMatch && filenameMatch[1]) {
          filename = filenameMatch[1].replace(/"/g, '');
        }
      }

      // Convert the response to a blob and download it
      const blob = await response.blob();
      const url = window.URL.createObjectURL(blob);
      const a = document.createElement('a');
      a.href = url;
      a.download = filename;
      document.body.appendChild(a);
      a.click();
      window.URL.revokeObjectURL(url);
      document.body.removeChild(a);

    } catch (err) {
      console.error('Error exporting wiki:', err);
      const errorMessage = err instanceof Error ? err.message : 'Unknown error during export';
      setExportError(errorMessage);
    } finally {
      setIsExporting(false);
      setLoadingMessage(undefined);
    }
  }, [wikiStructure, generatedPages, effectiveRepoInfo, language]);

  // No longer needed as we use the modal directly

  const confirmRefresh = useCallback(async (newToken?: string) => {
    setShowModelOptions(false);
    setLoadingMessage(messages.loading?.clearingCache || 'Clearing server cache...');
    setIsLoading(true); // Show loading indicator immediately

    try {
      const params = new URLSearchParams({
        owner: effectiveRepoInfo.owner,
        repo: effectiveRepoInfo.repo,
        repo_type: effectiveRepoInfo.type,
        language: language,
        provider: selectedProviderState,
        model: selectedModelState,
        is_custom_model: isCustomSelectedModelState.toString(),
        custom_model: customSelectedModelState,
        comprehensive: isComprehensiveView.toString(),
        authorization_code: authCode,
      });

      // Add file filters configuration
      if (modelExcludedDirs) {
        params.append('excluded_dirs', modelExcludedDirs);
      }
      if (modelExcludedFiles) {
        params.append('excluded_files', modelExcludedFiles);
      }

      if(authRequired && !authCode) {
        setIsLoading(false);
        console.error("Authorization code is required");
        setError('Authorization code is required');
        return;
      }

      const response = await fetch(`/api/wiki_cache?${params.toString()}`, {
        method: 'DELETE',
        headers: {
          'Accept': 'application/json',
        }
      });

      if (response.ok) {
        console.log('Server-side wiki cache cleared successfully.');
        // Optionally, show a success message for cache clearing if desired
        // setLoadingMessage('Cache cleared. Refreshing wiki...');
      } else {
        const errorText = await response.text();
        console.warn(`Failed to clear server-side wiki cache (status: ${response.status}): ${errorText}. Proceeding with refresh anyway.`);
        // Optionally, inform the user about the cache clear failure but that refresh will still attempt
        // setError(\`Cache clear failed: ${errorText}. Trying to refresh...\`);
        if(response.status == 401) {
          setIsLoading(false);
          setLoadingMessage(undefined);
          setError('Failed to validate the authorization code');
          console.error('Failed to validate the authorization code')
          return;
        }
      }
    } catch (err) {
      console.warn('Error calling DELETE /api/wiki_cache:', err);
      setIsLoading(false);
      setEmbeddingError(false); // Reset embedding error state
      // Optionally, inform the user about the cache clear error
      // setError(\`Error clearing cache: ${err instanceof Error ? err.message : String(err)}. Trying to refresh...\`);
      throw err;
    }

    // Update token if provided
    if (newToken) {
      // Update current token state
      setCurrentToken(newToken);
      // Update the URL parameters to include the new token
      const currentUrl = new URL(window.location.href);
      currentUrl.searchParams.set('token', newToken);
      window.history.replaceState({}, '', currentUrl.toString());
    }

    // Proceed with the rest of the refresh logic
    console.log('Refreshing wiki. Server cache will be overwritten upon new generation if not cleared.');

    // Clear the localStorage cache (if any remnants or if it was used before this change)
    const localStorageCacheKey = getCacheKey(effectiveRepoInfo.owner, effectiveRepoInfo.repo, effectiveRepoInfo.type, language, isComprehensiveView);
    localStorage.removeItem(localStorageCacheKey);

    // Reset cache loaded flag
    cacheLoadedSuccessfully.current = false;
    effectRan.current = false; // Allow the main data loading useEffect to run again

    // Reset all state
    setWikiStructure(undefined);
    setCurrentPageId(undefined);
    setGeneratedPages({});
    setPagesInProgress(new Set());
    setError(null);
    setEmbeddingError(false); // Reset embedding error state
    setIsLoading(true); // Set loading state for refresh
    setLoadingMessage(messages.loading?.initializing || 'Initializing wiki generation...');

    // Clear any in-progress requests for page content
    activeContentRequests.clear();
    // Reset flags related to request processing if they are component-wide
    setStructureRequestInProgress(false); // Assuming this flag should be reset
    setRequestInProgress(false); // Assuming this flag should be reset

    // Explicitly trigger the data loading process again by re-invoking what the main useEffect does.
    // This will first attempt to load from (now hopefully non-existent or soon-to-be-overwritten) server cache,
    // then proceed to fetchRepositoryStructure if needed.
    // To ensure fetchRepositoryStructure is called if cache is somehow still there or to force a full refresh:
    // One option is to directly call fetchRepositoryStructure() if force refresh means bypassing cache check.
    // For now, we rely on the standard loadData flow initiated by resetting effectRan and dependencies.
    // This will re-trigger the main data loading useEffect.
    // No direct call to fetchRepositoryStructure here, let the useEffect handle it based on effectRan.current = false.
  }, [effectiveRepoInfo, language, messages.loading, activeContentRequests, selectedProviderState, selectedModelState, isCustomSelectedModelState, customSelectedModelState, modelExcludedDirs, modelExcludedFiles, isComprehensiveView, authCode, authRequired]);

  // Start wiki generation when component mounts
  useEffect(() => {
    if (effectRan.current === false) {
      effectRan.current = true; // Set to true immediately to prevent re-entry due to StrictMode

      const loadData = async () => {
        // Try loading from server-side cache first
        setLoadingMessage(messages.loading?.fetchingCache || 'Checking for cached wiki...');
        try {
          const params = new URLSearchParams({
            owner: effectiveRepoInfo.owner,
            repo: effectiveRepoInfo.repo,
            repo_type: effectiveRepoInfo.type,
            language: language,
            comprehensive: isComprehensiveView.toString(),
          });
          const response = await fetch(`/api/wiki_cache?${params.toString()}`);

          if (response.ok) {
            const cachedData = await response.json(); // Returns null if no cache
            if (cachedData && cachedData.wiki_structure && cachedData.generated_pages && Object.keys(cachedData.generated_pages).length > 0) {
              console.log('Using server-cached wiki data');
              if(cachedData.model) {
                setSelectedModelState(cachedData.model);
              }
              if(cachedData.provider) {
                setSelectedProviderState(cachedData.provider);
              }

              // Update repoInfo
              if(cachedData.repo) {
                setEffectiveRepoInfo(cachedData.repo);
              } else if (cachedData.repo_url && !effectiveRepoInfo.repoUrl) {
                const updatedRepoInfo = { ...effectiveRepoInfo, repoUrl: cachedData.repo_url };
                setEffectiveRepoInfo(updatedRepoInfo); // Update effective repo info state
                console.log('Using cached repo_url:', cachedData.repo_url);
              }

              // Ensure the cached structure has sections and rootSections
              const cachedStructure = {
                ...cachedData.wiki_structure,
                sections: cachedData.wiki_structure.sections || [],
                rootSections: cachedData.wiki_structure.rootSections || []
              };

              // If sections or rootSections are missing, create intelligent ones based on page titles
              if (!cachedStructure.sections.length || !cachedStructure.rootSections.length) {
                const pages = cachedStructure.pages;
                const sections: WikiSection[] = [];
                const rootSections: string[] = [];

                // Group pages by common prefixes or categories
                const pageClusters = new Map<string, WikiPage[]>();

                // Define common categories that might appear in page titles
                const categories = [
                  { id: 'overview', title: 'Overview', keywords: ['overview', 'introduction', 'about'] },
                  { id: 'architecture', title: 'Architecture', keywords: ['architecture', 'structure', 'design', 'system'] },
                  { id: 'features', title: 'Core Features', keywords: ['feature', 'functionality', 'core'] },
                  { id: 'components', title: 'Components', keywords: ['component', 'module', 'widget'] },
                  { id: 'api', title: 'API', keywords: ['api', 'endpoint', 'service', 'server'] },
                  { id: 'data', title: 'Data Flow', keywords: ['data', 'flow', 'pipeline', 'storage'] },
                  { id: 'models', title: 'Models', keywords: ['model', 'ai', 'ml', 'integration'] },
                  { id: 'ui', title: 'User Interface', keywords: ['ui', 'interface', 'frontend', 'page'] },
                  { id: 'setup', title: 'Setup & Configuration', keywords: ['setup', 'config', 'installation', 'deploy'] }
                ];

                // Initialize clusters with empty arrays
                categories.forEach(category => {
                  pageClusters.set(category.id, []);
                });

                // Add an "Other" category for pages that don't match any category
                pageClusters.set('other', []);

                // Assign pages to categories based on title keywords
                pages.forEach((page: WikiPage) => {
                  const title = page.title.toLowerCase();
                  let assigned = false;

                  // Try to find a matching category
                  for (const category of categories) {
                    if (category.keywords.some(keyword => title.includes(keyword))) {
                      pageClusters.get(category.id)?.push(page);
                      assigned = true;
                      break;
                    }
                  }

                  // If no category matched, put in "Other"
                  if (!assigned) {
                    pageClusters.get('other')?.push(page);
                  }
                });

                // Create sections for non-empty categories
                for (const [categoryId, categoryPages] of pageClusters.entries()) {
                  if (categoryPages.length > 0) {
                    const category = categories.find(c => c.id === categoryId) ||
                                    { id: categoryId, title: categoryId === 'other' ? 'Other' : categoryId.charAt(0).toUpperCase() + categoryId.slice(1) };

                    const sectionId = `section-${categoryId}`;
                    sections.push({
                      id: sectionId,
                      title: category.title,
                      pages: categoryPages.map((p: WikiPage) => p.id)
                    });
                    rootSections.push(sectionId);

                    // Update page parentId
                    categoryPages.forEach((page: WikiPage) => {
                      page.parentId = sectionId;
                    });
                  }
                }

                // If we still have no sections (unlikely), fall back to importance-based grouping
                if (sections.length === 0) {
                  const highImportancePages = pages.filter((p: WikiPage) => p.importance === 'high').map((p: WikiPage) => p.id);
                  const mediumImportancePages = pages.filter((p: WikiPage) => p.importance === 'medium').map((p: WikiPage) => p.id);
                  const lowImportancePages = pages.filter((p: WikiPage) => p.importance === 'low').map((p: WikiPage) => p.id);

                  if (highImportancePages.length > 0) {
                    sections.push({
                      id: 'section-high',
                      title: 'Core Components',
                      pages: highImportancePages
                    });
                    rootSections.push('section-high');
                  }

                  if (mediumImportancePages.length > 0) {
                    sections.push({
                      id: 'section-medium',
                      title: 'Key Features',
                      pages: mediumImportancePages
                    });
                    rootSections.push('section-medium');
                  }

                  if (lowImportancePages.length > 0) {
                    sections.push({
                      id: 'section-low',
                      title: 'Additional Information',
                      pages: lowImportancePages
                    });
                    rootSections.push('section-low');
                  }
                }

                cachedStructure.sections = sections;
                cachedStructure.rootSections = rootSections;
              }

              setWikiStructure(cachedStructure);
              setGeneratedPages(cachedData.generated_pages);
              setCurrentPageId(cachedStructure.pages.length > 0 ? cachedStructure.pages[0].id : undefined);
              setIsLoading(false);
              setEmbeddingError(false); 
              setLoadingMessage(undefined);
              cacheLoadedSuccessfully.current = true;
              return; // Exit if cache is successfully loaded
            } else {
              console.log('No valid wiki data in server cache or cache is empty.');
            }
          } else {
            // Log error but proceed to fetch structure, as cache is optional
            console.error('Error fetching wiki cache from server:', response.status, await response.text());
          }
        } catch (error) {
          console.error('Error loading from server cache:', error);
          // Proceed to fetch structure if cache loading fails
        }

        // If we reached here, either there was no cache, it was invalid, or an error occurred
        // Proceed to fetch repository structure
        fetchRepositoryStructure();
      };

      loadData();

    } else {
      console.log('Skipping duplicate repository fetch/cache check');
    }

    // Clean up function for this effect is not strictly necessary for loadData,
    // but keeping the main unmount cleanup in the other useEffect
  }, [effectiveRepoInfo, effectiveRepoInfo.owner, effectiveRepoInfo.repo, effectiveRepoInfo.type, language, fetchRepositoryStructure, messages.loading?.fetchingCache, isComprehensiveView]);

  // Save wiki to server-side cache when generation is complete
  useEffect(() => {
    const saveCache = async () => {
      if (!isLoading &&
          !error &&
          wikiStructure &&
          Object.keys(generatedPages).length > 0 &&
          Object.keys(generatedPages).length >= wikiStructure.pages.length &&
          !cacheLoadedSuccessfully.current) {

        const allPagesHaveContent = wikiStructure.pages.every(page =>
          generatedPages[page.id] && generatedPages[page.id].content && generatedPages[page.id].content !== 'Loading...');

        if (allPagesHaveContent) {
          console.log('Attempting to save wiki data to server cache via Next.js proxy');

          try {
            // Make sure wikiStructure has sections and rootSections
            const structureToCache = {
              ...wikiStructure,
              sections: wikiStructure.sections || [],
              rootSections: wikiStructure.rootSections || []
            };
            const dataToCache = {
              repo: effectiveRepoInfo,
              language: language,
              comprehensive: isComprehensiveView,
              wiki_structure: structureToCache,
              generated_pages: generatedPages,
              provider: selectedProviderState,
              model: selectedModelState
            };
            const response = await fetch(`/api/wiki_cache`, {
              method: 'POST',
              headers: {
                'Content-Type': 'application/json',
              },
              body: JSON.stringify(dataToCache),
            });

            if (response.ok) {
              console.log('Wiki data successfully saved to server cache');
            } else {
              console.error('Error saving wiki data to server cache:', response.status, await response.text());
            }
          } catch (error) {
            console.error('Error saving to server cache:', error);
          }
        }
      }
    };

    saveCache();
  }, [isLoading, error, wikiStructure, generatedPages, effectiveRepoInfo.owner, effectiveRepoInfo.repo, effectiveRepoInfo.type, effectiveRepoInfo.repoUrl, repoUrl, language, isComprehensiveView]);

  const handlePageSelect = (pageId: string) => {
    if (currentPageId != pageId) {
      setCurrentPageId(pageId)
    }
  };

  const [isModelSelectionModalOpen, setIsModelSelectionModalOpen] = useState(false);

  return (
    <div className="h-screen paper-texture p-4 md:p-8 flex flex-col">
      <style>{wikiStyles}</style>

      <header className="max-w-[90%] xl:max-w-[1400px] mx-auto mb-8 h-fit w-full">
        <div className="flex flex-col md:flex-row md:items-center md:justify-between gap-4">
          <div className="flex items-center gap-4">
            <Link href="/" className="text-[var(--accent-primary)] hover:text-[var(--highlight)] flex items-center gap-1.5 transition-colors border-b border-[var(--border-color)] hover:border-[var(--accent-primary)] pb-0.5">
              <FaHome /> {messages.repoPage?.home || 'Home'}
            </Link>
          </div>
        </div>
      </header>

      <main className="flex-1 max-w-[90%] xl:max-w-[1400px] mx-auto overflow-y-auto">
        {isLoading ? (
          <div className="flex flex-col items-center justify-center p-8 bg-[var(--card-bg)] rounded-lg shadow-custom card-japanese">
            <div className="relative mb-6">
              <div className="absolute -inset-4 bg-[var(--accent-primary)]/10 rounded-full blur-md animate-pulse"></div>
              <div className="relative flex items-center justify-center">
                <div className="w-3 h-3 bg-[var(--accent-primary)]/70 rounded-full animate-pulse"></div>
                <div className="w-3 h-3 bg-[var(--accent-primary)]/70 rounded-full animate-pulse delay-75 mx-2"></div>
                <div className="w-3 h-3 bg-[var(--accent-primary)]/70 rounded-full animate-pulse delay-150"></div>
              </div>
            </div>
            <p className="text-[var(--foreground)] text-center mb-3 font-serif">
              {loadingMessage || messages.common?.loading || 'Loading...'}
              {isExporting && (messages.loading?.preparingDownload || ' Please wait while we prepare your download...')}
            </p>

            {/* Progress bar for page generation */}
            {wikiStructure && (
              <div className="w-full max-w-md mt-3">
                <div className="bg-[var(--background)]/50 rounded-full h-2 mb-3 overflow-hidden border border-[var(--border-color)]">
                  <div
                    className="bg-[var(--accent-primary)] h-2 rounded-full transition-all duration-300 ease-in-out"
                    style={{
                      width: `${Math.max(5, 100 * (wikiStructure.pages.length - pagesInProgress.size) / wikiStructure.pages.length)}%`
                    }}
                  />
                </div>
                <p className="text-xs text-[var(--muted)] text-center">
                  {language === 'ja'
                    ? `${wikiStructure.pages.length}ページ中${wikiStructure.pages.length - pagesInProgress.size}ページ完了`
                    : messages.repoPage?.pagesCompleted
                        ? messages.repoPage.pagesCompleted
                            .replace('{completed}', (wikiStructure.pages.length - pagesInProgress.size).toString())
                            .replace('{total}', wikiStructure.pages.length.toString())
                        : `${wikiStructure.pages.length - pagesInProgress.size} of ${wikiStructure.pages.length} pages completed`}
                </p>

                {/* Show list of in-progress pages */}
                {pagesInProgress.size > 0 && (
                  <div className="mt-4 text-xs">
                    <p className="text-[var(--muted)] mb-2">
                      {messages.repoPage?.currentlyProcessing || 'Currently processing:'}
                    </p>
                    <ul className="text-[var(--foreground)] space-y-1">
                      {Array.from(pagesInProgress).slice(0, 3).map(pageId => {
                        const page = wikiStructure.pages.find(p => p.id === pageId);
                        return page ? <li key={pageId} className="truncate border-l-2 border-[var(--accent-primary)]/30 pl-2">{page.title}</li> : null;
                      })}
                      {pagesInProgress.size > 3 && (
                        <li className="text-[var(--muted)]">
                          {language === 'ja'
                            ? `...他に${pagesInProgress.size - 3}ページ`
                            : messages.repoPage?.andMorePages
                                ? messages.repoPage.andMorePages.replace('{count}', (pagesInProgress.size - 3).toString())
                                : `...and ${pagesInProgress.size - 3} more`}
                        </li>
                      )}
                    </ul>
                  </div>
                )}
              </div>
            )}
          </div>
        ) : error ? (
          <div className="bg-[var(--highlight)]/5 border border-[var(--highlight)]/30 rounded-lg p-5 mb-4 shadow-sm">
            <div className="flex items-center text-[var(--highlight)] mb-3">
              <FaExclamationTriangle className="mr-2" />
              <span className="font-bold font-serif">{messages.repoPage?.errorTitle || messages.common?.error || 'Error'}</span>
            </div>
            <p className="text-[var(--foreground)] text-sm mb-3">{error}</p>
            <p className="text-[var(--muted)] text-xs">
              {embeddingError ? (
                messages.repoPage?.embeddingErrorDefault || 'This error is related to the document embedding system used for analyzing your repository. Please verify your embedding model configuration, API keys, and try again. If the issue persists, consider switching to a different embedding provider in the model settings.'
              ) : (
                messages.repoPage?.errorMessageDefault || 'Please check that your repository exists and is public. Valid formats are "owner/repo", "https://github.com/owner/repo", "https://gitlab.com/owner/repo", "https://bitbucket.org/owner/repo", or local folder paths like "C:\\path\\to\\folder" or "/path/to/folder".'
              )}
            </p>
            <div className="mt-5">
              <Link
                href="/"
                className="btn-japanese px-5 py-2 inline-flex items-center gap-1.5"
              >
                <FaHome className="text-sm" />
                {messages.repoPage?.backToHome || 'Back to Home'}
              </Link>
            </div>
          </div>
        ) : wikiStructure ? (
          <div className="h-full overflow-y-auto flex flex-col lg:flex-row gap-4 w-full overflow-hidden bg-[var(--card-bg)] rounded-lg shadow-custom card-japanese">
            {/* Wiki Navigation */}
            <div className="h-full w-full lg:w-[280px] xl:w-[320px] flex-shrink-0 bg-[var(--background)]/50 rounded-lg rounded-r-none p-5 border-b lg:border-b-0 lg:border-r border-[var(--border-color)] overflow-y-auto">
              <h3 className="text-lg font-bold text-[var(--foreground)] mb-3 font-serif">{wikiStructure.title}</h3>
              <p className="text-[var(--muted)] text-sm mb-5 leading-relaxed">{wikiStructure.description}</p>

              {/* Display repository info */}
              <div className="text-xs text-[var(--muted)] mb-5 flex items-center">
                {effectiveRepoInfo.type === 'local' ? (
                  <div className="flex items-center">
                    <FaFolder className="mr-2" />
                    <span className="break-all">{effectiveRepoInfo.localPath}</span>
                  </div>
                ) : (
                  <>
                    {effectiveRepoInfo.type === 'github' ? (
                      <FaGithub className="mr-2" />
                    ) : effectiveRepoInfo.type === 'gitlab' ? (
                      <FaGitlab className="mr-2" />
                    ) : (
                      <FaBitbucket className="mr-2" />
                    )}
                    <a
                      href={effectiveRepoInfo.repoUrl ?? ''}
                      target="_blank"
                      rel="noopener noreferrer"
                      className="hover:text-[var(--accent-primary)] transition-colors border-b border-[var(--border-color)] hover:border-[var(--accent-primary)]"
                    >
                      {effectiveRepoInfo.owner}/{effectiveRepoInfo.repo}
                    </a>
                  </>
                )}
              </div>

              {/* Wiki Type Indicator */}
              <div className="mb-3 flex items-center text-xs text-[var(--muted)]">
                <span className="mr-2">Wiki Type:</span>
                <span className={`px-2 py-0.5 rounded-full ${isComprehensiveView
                  ? 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] border border-[var(--accent-primary)]/30'
                  : 'bg-[var(--background)] text-[var(--foreground)] border border-[var(--border-color)]'}`}>
                  {isComprehensiveView
                    ? (messages.form?.comprehensive || 'Comprehensive')
                    : (messages.form?.concise || 'Concise')}
                </span>
              </div>

              {/* Refresh Wiki button */}
              <div className="mb-5">
                <button
                  onClick={() => setIsModelSelectionModalOpen(true)}
                  disabled={isLoading}
                  className="flex items-center w-full text-xs px-3 py-2 bg-[var(--background)] text-[var(--foreground)] rounded-md hover:bg-[var(--background)]/80 disabled:opacity-50 disabled:cursor-not-allowed border border-[var(--border-color)] transition-colors hover:cursor-pointer"
                >
                  <FaSync className={`mr-2 ${isLoading ? 'animate-spin' : ''}`} />
                  {messages.repoPage?.refreshWiki || 'Refresh Wiki'}
                </button>
              </div>

              {/* Export buttons */}
              {Object.keys(generatedPages).length > 0 && (
                <div className="mb-5">
                  <h4 className="text-sm font-semibold text-[var(--foreground)] mb-3 font-serif">
                    {messages.repoPage?.exportWiki || 'Export Wiki'}
                  </h4>
                  <div className="flex flex-col gap-2">
                    <button
                      onClick={() => exportWiki('markdown')}
                      disabled={isExporting}
                      className="btn-japanese flex items-center text-xs px-3 py-2 rounded-md disabled:opacity-50 disabled:cursor-not-allowed"
                    >
                      <FaDownload className="mr-2" />
                      {messages.repoPage?.exportAsMarkdown || 'Export as Markdown'}
                    </button>
                    <button
                      onClick={() => exportWiki('json')}
                      disabled={isExporting}
                      className="flex items-center text-xs px-3 py-2 bg-[var(--background)] text-[var(--foreground)] rounded-md hover:bg-[var(--background)]/80 disabled:opacity-50 disabled:cursor-not-allowed border border-[var(--border-color)] transition-colors"
                    >
                      <FaFileExport className="mr-2" />
                      {messages.repoPage?.exportAsJson || 'Export as JSON'}
                    </button>
                  </div>
                  {exportError && (
                    <div className="mt-2 text-xs text-[var(--highlight)]">
                      {exportError}
                    </div>
                  )}
                </div>
              )}

              <h4 className="text-md font-semibold text-[var(--foreground)] mb-3 font-serif">
                {messages.repoPage?.pages || 'Pages'}
              </h4>
              <WikiTreeView
                wikiStructure={wikiStructure}
                currentPageId={currentPageId}
                onPageSelect={handlePageSelect}
                messages={messages.repoPage}
              />
            </div>

            {/* Wiki Content */}
            <div id="wiki-content" className="w-full flex-grow p-6 lg:p-8 overflow-y-auto">
              {currentPageId && generatedPages[currentPageId] ? (
                <div className="max-w-[900px] xl:max-w-[1000px] mx-auto">
                  <h3 className="text-xl font-bold text-[var(--foreground)] mb-4 break-words font-serif">
                    {generatedPages[currentPageId].title}
                  </h3>


                  <div className="prose prose-sm md:prose-base lg:prose-lg max-w-none">
                    <Markdown
                      content={generatedPages[currentPageId].content}
                    />
                  </div>

                  {generatedPages[currentPageId].relatedPages.length > 0 && (
                    <div className="mt-8 pt-4 border-t border-[var(--border-color)]">
                      <h4 className="text-sm font-semibold text-[var(--muted)] mb-3">
                        {messages.repoPage?.relatedPages || 'Related Pages:'}
                      </h4>
                      <div className="flex flex-wrap gap-2">
                        {generatedPages[currentPageId].relatedPages.map(relatedId => {
                          const relatedPage = wikiStructure.pages.find(p => p.id === relatedId);
                          return relatedPage ? (
                            <button
                              key={relatedId}
                              className="bg-[var(--accent-primary)]/10 hover:bg-[var(--accent-primary)]/20 text-xs text-[var(--accent-primary)] px-3 py-1.5 rounded-md transition-colors truncate max-w-full border border-[var(--accent-primary)]/20"
                              onClick={() => handlePageSelect(relatedId)}
                            >
                              {relatedPage.title}
                            </button>
                          ) : null;
                        })}
                      </div>
                    </div>
                  )}
                </div>
              ) : (
                <div className="flex flex-col items-center justify-center p-8 text-[var(--muted)] h-full">
                  <div className="relative mb-4">
                    <div className="absolute -inset-2 bg-[var(--accent-primary)]/5 rounded-full blur-md"></div>
                    <FaBookOpen className="text-4xl relative z-10" />
                  </div>
                  <p className="font-serif">
                    {messages.repoPage?.selectPagePrompt || 'Select a page from the navigation to view its content'}
                  </p>
                </div>
              )}
            </div>
          </div>
        ) : null}
      </main>

      <footer className="max-w-[90%] xl:max-w-[1400px] mx-auto mt-8 flex flex-col gap-4 w-full">
        <div className="flex justify-between items-center gap-4 text-center text-[var(--muted)] text-sm h-fit w-full bg-[var(--card-bg)] rounded-lg p-3 shadow-sm border border-[var(--border-color)]">
          <p className="flex-1 font-serif">
            {messages.footer?.copyright || 'DeepWiki - Generate Wiki from GitHub/Gitlab/Bitbucket repositories'}
          </p>
          <ThemeToggle />
        </div>
      </footer>

      {/* Floating Chat Button */}
      {!isLoading && wikiStructure && (
        <button
          onClick={() => setIsAskModalOpen(true)}
          className="fixed bottom-6 right-6 w-14 h-14 rounded-full bg-[var(--accent-primary)] text-white shadow-lg flex items-center justify-center hover:bg-[var(--accent-primary)]/90 transition-all z-50"
          aria-label={messages.ask?.title || 'Ask about this repository'}
        >
          <FaComments className="text-xl" />
        </button>
      )}

      {/* Ask Modal - Always render but conditionally show/hide */}
      <div className={`fixed inset-0 bg-black/50 flex items-center justify-center z-50 p-4 transition-opacity duration-300 ${isAskModalOpen ? 'opacity-100' : 'opacity-0 pointer-events-none'}`}>
        <div className="bg-[var(--card-bg)] rounded-lg shadow-xl w-full max-w-3xl max-h-[80vh] flex flex-col">
          <div className="flex items-center justify-end p-3 absolute top-0 right-0 z-10">
            <button
              onClick={() => {
                // Just close the modal without clearing the conversation
                setIsAskModalOpen(false);
              }}
              className="text-[var(--muted)] hover:text-[var(--foreground)] transition-colors bg-[var(--card-bg)]/80 rounded-full p-2"
              aria-label="Close"
            >
              <FaTimes className="text-xl" />
            </button>
          </div>
          <div className="flex-1 overflow-y-auto p-4">
            <Ask
              repoInfo={effectiveRepoInfo}
              provider={selectedProviderState}
              model={selectedModelState}
              isCustomModel={isCustomSelectedModelState}
              customModel={customSelectedModelState}
              language={language}
              onRef={(ref) => (askComponentRef.current = ref)}
            />
          </div>
        </div>
      </div>

      <ModelSelectionModal
        isOpen={isModelSelectionModalOpen}
        onClose={() => setIsModelSelectionModalOpen(false)}
        provider={selectedProviderState}
        setProvider={setSelectedProviderState}
        model={selectedModelState}
        setModel={setSelectedModelState}
        isCustomModel={isCustomSelectedModelState}
        setIsCustomModel={setIsCustomSelectedModelState}
        customModel={customSelectedModelState}
        setCustomModel={setCustomSelectedModelState}
        isComprehensiveView={isComprehensiveView}
        setIsComprehensiveView={setIsComprehensiveView}
        showFileFilters={true}
        excludedDirs={modelExcludedDirs}
        setExcludedDirs={setModelExcludedDirs}
        excludedFiles={modelExcludedFiles}
        setExcludedFiles={setModelExcludedFiles}
        includedDirs={modelIncludedDirs}
        setIncludedDirs={setModelIncludedDirs}
        includedFiles={modelIncludedFiles}
        setIncludedFiles={setModelIncludedFiles}
        onApply={confirmRefresh}
        showWikiType={true}
        showTokenInput={effectiveRepoInfo.type !== 'local' && !currentToken} // Show token input if not local and no current token
        repositoryType={effectiveRepoInfo.type as 'github' | 'gitlab' | 'bitbucket'}
        authRequired={authRequired}
        authCode={authCode}
        setAuthCode={setAuthCode}
        isAuthLoading={isAuthLoading}
      />
    </div>
  );
}


================================================
FILE: src/app/[owner]/[repo]/slides/page.tsx
================================================
'use client';

import React, { useCallback, useState, useEffect, useRef, useMemo } from 'react';
import { useParams, useSearchParams } from 'next/navigation';
import Link from 'next/link';
import { FaArrowLeft, FaSync, FaDownload, FaArrowRight, FaArrowUp, FaTimes } from 'react-icons/fa';
import ThemeToggle from '@/components/theme-toggle';
import { useLanguage } from '@/contexts/LanguageContext';
import { RepoInfo } from '@/types/repoinfo';
import getRepoUrl from '@/utils/getRepoUrl';

// Helper function to add tokens and other parameters to request body
const addTokensToRequestBody = (
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  requestBody: Record<string, any>,
  token: string,
  repoType: string,
  provider: string = '',
  model: string = '',
  isCustomModel: boolean = false,
  customModel: string = '',
  language: string = 'en',
) => {
  if (token !== '') {
    requestBody.token = token;
  }

  // Add provider-based model selection parameters
  requestBody.provider = provider;
  requestBody.model = model;
  if (isCustomModel && customModel) {
    requestBody.custom_model = customModel;
  }

  requestBody.language = language;
};

interface Slide {
  id: string;
  title: string;
  content: string;
  html: string;
}

export default function SlidesPage() {
  // Get route parameters and search params
  const params = useParams();
  const searchParams = useSearchParams();

  // Extract owner and repo from route params
  const owner = params.owner as string;
  const repo = params.repo as string;

  // Extract tokens from search params
  const token = searchParams.get('token') || '';
  const repoType = searchParams.get('type') || 'github';
  const localPath = searchParams.get('local_path') ? decodeURIComponent(searchParams.get('local_path') || '') : undefined;
  const repoUrl = searchParams.get('repo_url') ? decodeURIComponent(searchParams.get('repo_url') || '') : undefined;
  const providerParam = searchParams.get('provider') || '';
  const modelParam = searchParams.get('model') || '';
  const isCustomModelParam = searchParams.get('is_custom_model') === 'true';
  const customModelParam = searchParams.get('custom_model') || '';
  const language = searchParams.get('language') || 'en';

  // Import language context for translations
  const { messages } = useLanguage();

  // Initialize repo info with useMemo to prevent unnecessary re-renders
  const repoInfo = useMemo<RepoInfo>(() => ({
    owner,
    repo,
    type: repoType,
    token: token || null,
    localPath: localPath || null,
    repoUrl: repoUrl || null
  }), [owner, repo, repoType, token, localPath, repoUrl]);

  // State variables
  const [isLoading, setIsLoading] = useState(false);
  const [loadingMessage, setLoadingMessage] = useState<string | undefined>(
    messages.loading?.initializing || 'Initializing slides generation...'
  );
  const [error, setError] = useState<string | null>(null);
  const [slides, setSlides] = useState<Slide[]>([]);
  const [currentSlideIndex, setCurrentSlideIndex] = useState(0);
  const [isExporting, setIsExporting] = useState(false);
  const [exportError, setExportError] = useState<string | null>(null);
  const [isFullscreen, setIsFullscreen] = useState(false);

  // Define a type for the wiki content
  interface WikiPage {
    id: string;
    title: string;
    content: string;
    importance: string;
    filePaths: string[];
    relatedPages: string[];
  }

  interface WikiSection {
    id: string;
    title: string;
    pages: string[];
    subsections: string[];
  }

  interface WikiStructure {
    description: string;
    pages: WikiPage[];
    sections: WikiSection[];
    rootSections: string[];
  }

  interface WikiCacheData {
    wiki_structure: WikiStructure;
    generated_pages: Record<string, WikiPage>;
  }

  const [cachedWikiContent, setCachedWikiContent] = useState<WikiCacheData | null>(null);

  // Function to fetch cached wiki content
  const fetchCachedWikiContent = useCallback(async () => {
    try {
      const params = new URLSearchParams({
        owner: repoInfo.owner,
        repo: repoInfo.repo,
        repo_type: repoInfo.type,
        language: language,
      });
      const response = await fetch(`/api/wiki_cache?${params.toString()}`);

      if (response.ok) {
        const cachedData = await response.json();
        if (cachedData && cachedData.wiki_structure && cachedData.generated_pages &&
            Object.keys(cachedData.generated_pages).length > 0) {
          console.log('Successfully fetched cached wiki data for slides generation');
          setCachedWikiContent(cachedData);
          return cachedData;
        } else {
          console.log('No valid wiki data in server cache or cache is empty.');
          return null;
        }
      } else {
        console.error('Error fetching wiki cache from server:', response.status);
        return null;
      }
    } catch (error) {
      console.error('Error loading from server cache:', error);
      return null;
    }
  }, [repoInfo.owner, repoInfo.repo, repoInfo.type, language]);

  // Generate slides content
  const generateSlidesContent = useCallback(async () => {
    if (isLoading) return;

    setIsLoading(true);
    setError(null);
    // Clear previous content
    setSlides([]);
    setCurrentSlideIndex(0);
    setLoadingMessage(messages.loading?.generatingSlides || 'Generating slides...');

    try {
      // Get repository URL
      const repoUrl = getRepoUrl(repoInfo);

      // Fetch cached wiki content if not already available
      let wikiData = cachedWikiContent;
      if (!wikiData) {
        wikiData = await fetchCachedWikiContent();
      }

      // We'll just pass the entire wiki data to the LLM without complex processing
      let wikiContent = '';

      if (wikiData && wikiData.wiki_structure && wikiData.generated_pages) {
        // Add the wiki structure description
        wikiContent += `## Project Overview\n${wikiData.wiki_structure.description || ''}\n\n`;

        // Add all wiki pages content
        const pages = wikiData.wiki_structure.pages || [];
        const generatedPages = wikiData.generated_pages || {};

        // Limit the total content to avoid token limits
        let totalContentLength = 0;
        const maxContentLength = 30000; // Approximate limit to avoid token issues

        // First add high importance pages
        const highImportancePages = pages.filter(page => page.importance === 'high');
        for (const page of highImportancePages) {
          if (generatedPages[page.id] && generatedPages[page.id].content) {
            const content = `## ${page.title}\n${generatedPages[page.id].content}\n\n`;
            wikiContent += content;
            totalContentLength += content.length;

            if (totalContentLength > maxContentLength) break;
          }
        }

        // Then add other pages if we still have space
        if (totalContentLength < maxContentLength) {
          for (const page of pages) {
            // Skip high importance pages we've already added
            if (page.importance === 'high') continue;

            if (generatedPages[page.id] && generatedPages[page.id].content) {
              const content = `## ${page.title}\n${generatedPages[page.id].content}\n\n`;

              // Check if adding this content would exceed our limit
              if (totalContentLength + content.length > maxContentLength) {
                // If it would exceed, just add a summary
                const summaryMatch = generatedPages[page.id].content.match(/# .*?\n\n(.*?)(\n\n|$)/);
                const summary = summaryMatch ? summaryMatch[1].trim() : 'No summary available';
                const summaryContent = `## ${page.title}\n${summary}\n\n`;

                wikiContent += summaryContent;
                totalContentLength += summaryContent.length;
              } else {
                // Otherwise add the full content
                wikiContent += content;
                totalContentLength += content.length;
              }

              if (totalContentLength > maxContentLength) break;
            }
          }
        }
      }

      // First, get a plan for the slides
      const planRequestBody: Record<string, unknown> = {
        repo_url: repoUrl,
        type: repoInfo.type,
        messages: [{
          role: 'user',
          content: `Create an engaging outline for a high-quality marketing slide presentation about the ${owner}/${repo} repository.

Based on this wiki content:
${wikiContent}

I need a numbered list of 7-8 creative slide titles with brief descriptions for a professional marketing presentation. Think of this as a pitch deck that would impress potential users or investors.

Focus on:
- Compelling value propositions
- Unique selling points
- Impressive features and capabilities
- Real-world applications and benefits
- Visually interesting concepts that can be represented creatively

For example, instead of generic titles like "Introduction" or "Features", use more engaging titles like:
1. "Revolutionizing Development with ${repo}"
2. "Unlock Powerful Capabilities with Our Innovative Architecture"
3. "How ${repo} Transforms Your Workflow"

Give me the numbered list with brief descriptions for each slide. Be creative but professional.`
        }]
      };

      // Add tokens if available
      addTokensToRequestBody(planRequestBody, token, repoInfo.type, providerParam, modelParam, isCustomModelParam, customModelParam, language);

      // Use WebSocket for communication
      let planContent = '';

      try {
        // Create WebSocket URL from the server base URL
        const serverBaseUrl = process.env.SERVER_BASE_URL || 'http://localhost:8001';
        const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws')? serverBaseUrl.replace(/^https/, 'wss'): serverBaseUrl.replace(/^http/, 'ws');
        const wsUrl = `${wsBaseUrl}/ws/chat`;

        // Create a new WebSocket connection
        const ws = new WebSocket(wsUrl);

        // Create a single promise that handles the entire WebSocket lifecycle
        await new Promise<void>((resolve, reject) => {
          let isResolved = false;

          // If the connection doesn't open or complete within 10 seconds, fall back to HTTP
          const timeout = setTimeout(() => {
            if (!isResolved) {
              isResolved = true;
              // Try to close the WebSocket if it's still open
              if (ws.readyState === WebSocket.OPEN) {
                ws.close();
              }
              reject(new Error('WebSocket connection timeout'));
            }
          }, 10000);

          // Set up event handlers
          ws.onopen = () => {
            console.log('WebSocket connection established for slide plan');
            // Send the request as JSON
            ws.send(JSON.stringify(planRequestBody));
            // Don't resolve here, wait for the complete response
          };

          ws.onmessage = (event) => {
            const chunk = event.data;
            planContent += chunk;
          };

          ws.onclose = () => {
            clearTimeout(timeout);
            console.log('WebSocket connection closed for slide plan');
            if (!isResolved) {
              isResolved = true;
              resolve();
            }
          };

          ws.onerror = (error) => {
            console.error('WebSocket error:', error);
            if (!isResolved) {
              isResolved = true;
              reject(new Error('WebSocket connection failed'));
            }
          };
        });
      } catch (wsError) {
        console.error('WebSocket error, falling back to HTTP:', wsError);

        // Fall back to HTTP if WebSocket fails
        const planResponse = await fetch(`/api/chat/stream`, {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
          },
          body: JSON.stringify(planRequestBody)
        });

        if (!planResponse.ok) {
          throw new Error(`Error generating slide plan: ${planResponse.status}`);
        }

        // Process the plan response
        planContent = '';
        const planReader = planResponse.body?.getReader();
        const planDecoder = new TextDecoder();

        if (!planReader) {
          throw new Error('Failed to get plan response reader');
        }

        try {
          while (true) {
            const { done, value } = await planReader.read();
            if (done) break;
            const chunk = planDecoder.decode(value, { stream: true });
            planContent += chunk;
          }
          // Ensure final decoding
          const finalChunk = planDecoder.decode();
          planContent += finalChunk;
        } catch (readError) {
          console.error('Error reading plan stream:', readError);
          throw new Error('Error processing plan response stream');
        }
      }

      // Log the plan content for debugging
      console.log("Received slide plan:", planContent);

      // Try multiple regex patterns to extract slide plan
      let slideMatches: RegExpExecArray[] = [];

      // Pattern 1: Standard numbered list with periods (1. Title: Description)
      const pattern1 = /\d+\.\s+(.*?)(?=\n\d+\.|\n*$)/g;
      let match;
      while ((match = pattern1.exec(planContent)) !== null) {
        slideMatches.push(match);
      }

      // Pattern 2: Numbered list with parentheses (1) Title: Description
      if (slideMatches.length === 0) {
        const pattern2 = /\d+\)\s+(.*?)(?=\n\d+\)|\n*$)/g;
        while ((match = pattern2.exec(planContent)) !== null) {
          slideMatches.push(match);
        }
      }

      // Pattern 3: Look for lines with "Slide" followed by number
      if (slideMatches.length === 0) {
        const pattern3 = /Slide\s+\d+\s*:?\s*(.*?)(?=\nSlide|\n*$)/gi;
        while ((match = pattern3.exec(planContent)) !== null) {
          slideMatches.push(match);
        }
      }

      // Pattern 4: Look for any lines with a title that might be a slide
      if (slideMatches.length === 0) {
        const pattern4 = /^([^:\n]+)(?::\s*(.*?))?$/gm;
        while ((match = pattern4.exec(planContent)) !== null) {
          // Filter out very short lines or lines that look like instructions
          if (match[1].length > 3 && !match[1].toLowerCase().includes("please") && !match[1].toLowerCase().includes("here")) {
            slideMatches.push(match);
          }
        }
      }

      // If we still don't have matches, create some default slides
      if (slideMatches.length === 0) {
        console.warn("Could not extract slide plan from response, using default slides");

        // Create default slides
        const defaultSlides = [
          `Title Slide: Introduction to ${repo}`,
          `Overview: Key features and purpose of ${repo}`,
          `Architecture: System components and structure`,
          `Features: Main capabilities and functionalities`,
          `Implementation: How it works and technical details`,
          `Use Cases: How to use ${repo} effectively`,
          `Conclusion: Summary and next steps`
        ];

        // Convert to match format
        slideMatches = defaultSlides.map((slide, index) => {
          const mockMatch = ["", slide] as unknown as RegExpExecArray;
          mockMatch.index = index;
          mockMatch.input = slide;
          return mockMatch;
        });
      }

      console.log(`Found ${slideMatches.length} slides in the plan`);


      // Now generate each slide one by one
      const generatedSlides: Slide[] = [];
      let slideCounter = 1;

      for (const slideMatch of slideMatches) {
        const slideTitle = slideMatch[1].split(':')[0].trim();
        const slideDescription = slideMatch[1].includes(':') ? slideMatch[1].split(':')[1].trim() : '';

        setLoadingMessage(`Generating slide ${slideCounter} of ${slideMatches.length}: ${slideTitle}`);

        // Create a request for this specific slide
        const slideRequestBody: Record<string, unknown> = {
          repo_url: repoUrl,
          type: repoInfo.type,
          messages: [{
            role: 'user',
            content: `Create a single HTML slide about the ${owner}/${repo} repository with the title "${slideTitle}".

This is slide ${slideCounter} of ${slideMatches.length} in the presentation.
${slideDescription ? `The slide should cover: ${slideDescription}` : ''}

Use the following wiki content as reference:
${wikiContent}

I need ONLY the HTML for this slide. The slide should maintain a consistent dark theme with gradients and professional styling, but BE CREATIVE with the content and layout.

IMPORTANT LAYOUT REQUIREMENTS:
1. The slide MUST be designed for a 16:9 HORIZONTAL layout (landscape orientation)
2. All content MUST fit within the visible area without requiring scrolling
3. Text must be properly sized and positioned for readability in a presentation context
4. Content should be well-structured with clear visual hierarchy
5. Use grid or flexbox layouts to ensure proper horizontal organization of content
6. Limit text content to what can be comfortably read from a distance

MARKETING QUALITY:
Create a genuinely high-quality marketing slide that would impress potential users or investors. Use compelling language, impactful visuals, and professional marketing techniques. Think of this as a slide for a professional pitch deck or product showcase.

You can use:
- Two or three-column layouts for better horizontal space utilization
- Engaging marketing copy with concise bullet points (no more than 4-5 per slide)
- Visual metaphors and analogies positioned to the side of text content
- Charts, diagrams, or code snippets when relevant (positioned appropriately)
- Icons from Font Awesome (already included)
- Creative use of gradients, shadows, and visual elements

The slide should maintain the dark theme aesthetic but can be uniquely designed. Use creative HTML/CSS to make the slide visually impressive while ensuring all content fits properly in the horizontal layout.

Here's a basic structure to build upon (but feel free to be creative):

<div class="slide">
    <div class="code-pattern"></div>
    <div class="accent-glow"></div>

    <div class="content">
        <!-- Use horizontal layout structures -->
        <div class="slide-header">
            <h1 class="main-title">${slideTitle}</h1>
        </div>

        <div class="slide-body">
            <!-- Consider using flex or grid layout here -->
            <div class="left-column">
                <!-- Main points or text content -->
            </div>
            <div class="right-column">
                <!-- Visual elements, diagrams, or supporting content -->
            </div>
        </div>
    </div>
</div>
<style>
    /* Base styling with horizontal layout focus */
    .slide {
        width: 100%;
        height: 100%;
        background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
        color: #e6edf3;
        display: flex;
        flex-direction: column;
        overflow: hidden;
    }
    .content {
        display: flex;
        flex-direction: column;
        height: 100%;
        padding: 40px 60px;
        z-index: 2;
    }
    .slide-header {
        margin-bottom: 30px;
    }
    .slide-body {
        display: flex;
        flex: 1;
        gap: 40px;
    }
    .left-column, .right-column {
        flex: 1;
        display: flex;
        flex-direction: column;
    }
</style>

Please return ONLY the HTML with no markdown formatting or code blocks. Just the raw HTML for the slide.`
          }]
        };

        // Add tokens if available
        addTokensToRequestBody(slideRequestBody, token, repoInfo.type, providerParam, modelParam, isCustomModelParam, customModelParam, language);

        // Use WebSocket for communication
        let slideContent = '';

        try {
          // Create WebSocket URL from the server base URL
          const serverBaseUrl = process.env.SERVER_BASE_URL || 'http://localhost:8001';
          const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws')? serverBaseUrl.replace(/^https/, 'wss'): serverBaseUrl.replace(/^http/, 'ws');
          const wsUrl = `${wsBaseUrl}/ws/chat`;

          // Create a new WebSocket connection
          const ws = new WebSocket(wsUrl);

          // Create a single promise that handles the entire WebSocket lifecycle
          await new Promise<void>((resolve, reject) => {
            let isResolved = false;

            // If the connection doesn't open or complete within 10 seconds, fall back to HTTP
            const timeout = setTimeout(() => {
              if (!isResolved) {
                isResolved = true;
                // Try to close the WebSocket if it's still open
                if (ws.readyState === WebSocket.OPEN) {
                  ws.close();
                }
                reject(new Error('WebSocket connection timeout'));
              }
            }, 10000);

            // Set up event handlers
            ws.onopen = () => {
              console.log(`WebSocket connection established for slide ${slideCounter}`);
              // Send the request as JSON
              ws.send(JSON.stringify(slideRequestBody));
              // Don't resolve here, wait for the complete response
            };

            ws.onmessage = (event) => {
              const chunk = event.data;
              slideContent += chunk;
            };

            ws.onclose = () => {
              clearTimeout(timeout);
              console.log(`WebSocket connection closed for slide ${slideCounter}`);
              if (!isResolved) {
                isResolved = true;
                resolve();
              }
            };

            ws.onerror = (error) => {
              console.error('WebSocket error:', error);
              if (!isResolved) {
                isResolved = true;
                reject(new Error('WebSocket connection failed'));
              }
            };
          });
        } catch (wsError) {
          console.error('WebSocket error, falling back to HTTP:', wsError);

          // Fall back to HTTP if WebSocket fails
          const slideResponse = await fetch(`/api/chat/stream`, {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify(slideRequestBody)
          });

          if (!slideResponse.ok) {
            throw new Error(`Error generating slide ${slideCounter}: ${slideResponse.status}`);
          }

          // Process the slide response
          slideContent = '';
          const slideReader = slideResponse.body?.getReader();
          const slideDecoder = new TextDecoder();

          if (!slideReader) {
            throw new Error(`Failed to get reader for slide ${slideCounter}`);
          }

          try {
            while (true) {
              const { done, value } = await slideReader.read();
              if (done) break;
              const chunk = slideDecoder.decode(value, { stream: true });
              slideContent += chunk;
            }
            // Ensure final decoding
            const finalChunk = slideDecoder.decode();
            slideContent += finalChunk;
          } catch (readError) {
            console.error(`Error reading slide ${slideCounter} stream:`, readError);
            throw new Error(`Error processing slide ${slideCounter} response stream`);
          }
        }

        // Extract HTML content - look for content between HTML tags or code blocks
        let slideHtml = '';

        console.log(`Processing slide ${slideCounter} response`);

        // Try to extract from code blocks if present
        const codeBlockMatch = slideContent.match(/```(?:html)?\s*([\s\S]*?)\s*```/);
        if (codeBlockMatch) {
          slideHtml = codeBlockMatch[1];
          console.log("Extracted HTML from code block");
        }
        // Try to extract content between <div class="slide"> and closing </div>
        else if (slideContent.includes('<div class="slide"')) {
          const divMatch = slideContent.match(/<div class="slide"[\s\S]*?<\/div>\s*<\/div>/);
          if (divMatch) {
            slideHtml = divMatch[0];
            console.log("Extracted HTML from div tags");
          }
        }
        // Try to extract any HTML-like content
        else if (slideContent.includes('<') && slideContent.includes('>')) {
          const htmlTagMatch = slideContent.match(/<[\s\S]*?>/);
          if (htmlTagMatch) {
            // Find the first HTML tag
            const firstTag = htmlTagMatch[0].match(/<([a-z][a-z0-9]*)/i);
            if (firstTag && firstTag[1]) {
              const tagName = firstTag[1];
              // Try to extract everything from this opening tag to its closing tag
              const fullTagRegex = new RegExp(`<${tagName}[\\s\\S]*?<\\/${tagName}>`, 'i');
              const fullTagMatch = slideContent.match(fullTagRegex);
              if (fullTagMatch) {
                slideHtml = fullTagMatch[0];
                console.log(`Extracted HTML using tag matching for ${tagName}`);
              }
            }
          }
        }

        // If we still don't have HTML, use the raw content
        if (!slideHtml) {
          console.log("Using raw content as HTML");
          slideHtml = slideContent;
        }

        // Add default styling if not present
        if (!slideHtml.includes('<style>') && !slideHtml.includes('<link rel="stylesheet"')) {
          slideHtml = `
<div class="slide">
    <div class="code-pattern"></div>
    <div class="accent-glow"></div>

    <div class="content">
        <div class="slide-header">
            <h1 class="main-title">${slideTitle}</h1>
        </div>

        <div class="slide-body">
            <div class="left-column">
                <div class="slide-content">
                    ${slideHtml}
                </div>
            </div>
            <div class="right-column">
                <!-- The AI will likely provide content for both columns, but if not, this ensures proper layout -->
                <div class="visual-content">
                    <i class="fas fa-code fa-5x" style="opacity: 0.3; color: #58a6ff; margin: 2rem auto; display: block; text-align: center;"></i>
                </div>
            </div>
        </div>
    </div>
</div>
<style>
    /* Base slide styling - optimized for horizontal layout */
    .slide {
        width: 100%;
        height: 100%;
        position: relative;
        overflow: hidden;
        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
        color: #e6edf3;
        background: linear-gradient(135deg, #0d1117 0%, #161b22 100%);
        display: flex;
        flex-direction: column;
    }

    /* Optional decorative elements that can be used or overridden */
    .code-pattern {
        position: absolute;
        width: 100%;
        height: 100%;
        background-image: url("data:image/svg+xml,%3Csvg width='60' height='60' viewBox='0 0 60 60' xmlns='http://www.w3.org/2000/svg'%3E%3Cg fill='none' fill-rule='evenodd'%3E%3Cg fill='%2330363d' fill-opacity='0.15'%3E%3Cpath d='M36 34v-4h-2v4h-4v2h4v4h2v-4h4v-2h-4zm0-30V0h-2v4h-4v2h4v4h2V6h4V4h-4zM6 34v-4H4v4H0v2h4v4h2v-4h4v-2H6zM6 4V0H4v4H0v2h4v4h2V6h4V4H6z'/%3E%3C/g%3E%3C/g%3E%3C/svg%3E");
        opacity: 0.2;
        z-index: 0;
    }

    .accent-glow {
        position: absolute;
        width: 600px;
        height: 600px;
        border-radius: 50%;
        background: radial-gradient(circle, rgba(88, 166, 255, 0.1) 0%, rgba(88, 166, 255, 0) 70%);
        top: -200px;
        right: -100px;
        z-index: 1;
    }

    /* Content container - optimized for horizontal layout */
    .content {
        z-index: 2;
        position: relative;
        height: 100%;
        padding: 40px 60px;
        display: flex;
        flex-direction: column;
    }

    /* Slide structure for better horizontal organization */
    .slide-header {
        margin-bottom: 30px;
    }

    .slide-body {
        display: flex;
        flex: 1;
        gap: 40px;
        align-items: flex-start;
    }

    .left-column, .right-column {
        flex: 1;
        display: flex;
        flex-direction: column;
    }

    /* Default title styling - can be overridden */
    .main-title {
        font-size: 3.5rem;
        font-weight: 700;
        background: linear-gradient(135deg, #58a6ff 0%, #8957e5 100%);
        -webkit-background-clip: text;
        background-clip: text;
        -webkit-text-fill-color: transparent;
        line-height: 1.1;
        margin-bottom: 10px;
    }

    /* Default content styling - optimized for readability */
    .slide-content {
        font-size: 1.5rem;
        color: #e6edf3;
        line-height: 1.5;
        display: flex;
        flex-direction: column;
    }

    /* Ensure bullet points are properly spaced and aligned */
    .slide-content ul, .slide-content ol {
        margin: 0.5em 0;
        padding-left: 1.5em;
    }

    .slide-content li {
        margin-bottom: 0.5em;
    }

    /* Ensure code snippets don't overflow */
    .slide-content pre, .slide-content code {
        max-width: 100%;
        overflow-x: auto;
        white-space: pre-wrap;
        font-size: 1.2rem;
    }

    /* Additional utility classes for creative layouts */
    .flex-row { display: flex; flex-direction: row; }
    .flex-col { display: flex; flex-direction: column; }
    .items-center { align-items: center; }
    .justify-center { justify-content: center; }
    .justify-between { justify-content: space-between; }
    .text-center { text-align: center; }
    .text-right { text-align: right; }
    .w-full { width: 100%; }
    .h-full { height: 100%; }
    .relative { position: relative; }
    .absolute { position: absolute; }

    /* Accent colors for creative use */
    .text-accent-blue { color: #58a6ff; }
    .text-accent-purple { color: #8957e5; }
    .text-accent-green { color: #3fb950; }
    .text-accent-orange { color: #f0883e; }
    .bg-accent-blue { background-color: rgba(88, 166, 255, 0.2); }
    .bg-accent-purple { background-color: rgba(137, 87, 229, 0.2); }
</style>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.4.0/css/all.min.css">
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/mermaid@10.0.0/dist/mermaid.min.js"></script>
<script>
  // Initialize Mermaid for diagrams if present
  document.addEventListener('DOMContentLoaded', function() {
    if (typeof mermaid !== 'undefined') {
      mermaid.initialize({
        theme: 'dark',
        securityLevel: 'loose',
        startOnLoad: true
      });
    }

    // Initialize any Chart.js charts if present
    if (typeof Chart !== 'undefined') {
      // Charts will be initialized by their own script tags
    }
  });
</script>
          `;
        }

        // Create the slide object
        const slide: Slide = {
          id: `slide-${slideCounter}`,
          title: slideTitle,
          content: slideDescription || slideTitle,
          html: slideHtml
        };

        // Add to our slides array
        generatedSlides.push(slide);

        // Update the state with the slides we have so far
        setSlides([...generatedSlides]);

        slideCounter++;
      }

      // Set the final slides
      setSlides(generatedSlides);

    } catch (err) {
      console.error('Error generating slides content:', err);
      setError(err instanceof Error ? err.message : 'An unknown error occurred');
    } finally {
      setIsLoading(false);
      setLoadingMessage(undefined);
    }
  }, [owner, repo, repoInfo, token, providerParam, modelParam, isCustomModelParam, customModelParam, language, isLoading, messages.loading, cachedWikiContent, fetchCachedWikiContent]);

  // Export slides content
  const exportSlides = useCallback(async () => {
    if (!slides || slides.length === 0) {
      setExportError('No slides to export');
      return;
    }

    try {
      setIsExporting(true);
      setExportError(null);

      // Create a full HTML document with all slides
      const htmlContent = `
<!DOCTYPE html>
<html lang="${language}">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>${repo} Slides</title>
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.4.0/css/all.min.css">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.css">
  <script src="https://cdn.jsdelivr.net/npm/chart.js@3.9.1/dist/chart.min.js"></script>
  <script src="https://cdn.jsdelivr.net/npm/mermaid@10.0.0/dist/mermaid.min.js"></script>
  <style>
    body {
      font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
      margin: 0;
      padding: 0;
      background-color: #0d1117;
      color: #e6edf3;
    }
    .slide-container {
      max-width: 1280px;
      height: 720px; /* 16:9 aspect ratio */
      margin: 2rem auto;
      page-break-after: always;
      position: relative;
      overflow: hidden;
      box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
      border-radius: 8px;
    }

    /* Ensure proper horizontal layout in exported slides */
    .slide-body {
      display: flex;
      flex: 1;
      gap: 40px;
      align-items: flex-start;
    }

    .left-column, .right-column {
      flex: 1;
      display: flex;
      flex-direction: column;
    }

    /* Ensure content is properly sized */
    .slide-content {
      font-size: 1.5rem;
      line-height: 1.5;
    }

    /* Ensure bullet points are properly spaced */
    .slide-content ul, .slide-content ol {
      margin: 0.5em 0;
      padding-left: 1.5em;
    }

    .slide-content li {
      margin-bottom: 0.5em;
    }

    /* Ensure code snippets don't overflow */
    .slide-content pre, .slide-content code {
      max-width: 100%;
      overflow-x: auto;
      white-space: pre-wrap;
      font-size: 1.2rem;
    }
    @media print {
      .slide-container {
        page-break-after: always;
        margin: 0;
        height: 100vh;
        display: flex;
        flex-direction: column;
        justify-content: center;
        box-shadow: none;
        border-radius: 0;
      }
    }
    /* Navigation controls for presentation mode */
    .nav-controls {
      position: fixed;
      bottom: 20px;
      left: 50%;
      transform: translateX(-50%);
      display: flex;
      gap: 20px;
      z-index: 1000;
      background: rgba(13, 17, 23, 0.8);
      padding: 10px 20px;
      border-radius: 30px;
      box-shadow: 0 5px 15px rgba(0, 0, 0, 0.3);
    }
    .nav-btn {
      background: rgba(56, 139, 253, 0.1);
      border: 1px solid rgba(56, 139, 253, 0.4);
      color: #58a6ff;
      border-radius: 50%;
      width: 40px;
      height: 40px;
      display: flex;
      align-items: center;
      justify-content: center;
      cursor: pointer;
      font-size: 18px;
      transition: all 0.2s ease;
    }
    .nav-btn:hover {
      background: rgba(56, 139, 253, 0.2);
    }
    .slide-indicator {
      display: flex;
      align-items: center;
      color: #8b949e;
      font-size: 14px;
    }
    @media print {
      .nav-controls {
        display: none;
      }
    }
  </style>
</head>
<body>
  ${slides.map(slide => `<div class="slide-container">${slide.html}</div>`).join('\n')}

  <!-- Navigation controls (only visible in browser) -->
  <div class="nav-controls">
    <div class="nav-btn prev-slide" onclick="prevSlide()">
      <i class="fas fa-chevron-left"></i>
    </div>
    <div class="slide-indicator">
      <span id="current-slide">1</span>/<span id="total-slides">${slides.length}</span>
    </div>
    <div class="nav-btn next-slide" onclick="nextSlide()">
      <i class="fas fa-chevron-right"></i>
    </div>
  </div>

  <script>
    // Simple presentation navigation
    let currentSlide = 1;
    const totalSlides = ${slides.length};
    const slideContainers = document.querySelectorAll('.slide-container');

    // Initialize - show only first slide
    function initSlides() {
      slideContainers.forEach((slide, index) => {
        if (index === 0) {
          slide.style.display = 'block';
        } else {
          slide.style.display = 'none';
        }
      });
      updateIndicator();
    }

    function showSlide(slideNumber) {
      slideContainers.forEach((slide, index) => {
        slide.style.display = index + 1 === slideNumber ? 'block' : 'none';
      });
      updateIndicator();
    }

    function nextSlide() {
      if (currentSlide < totalSlides) {
        currentSlide++;
        showSlide(currentSlide);
      }
    }

    function prevSlide() {
      if (currentSlide > 1) {
        currentSlide--;
        showSlide(currentSlide);
      }
    }

    function updateIndicator() {
      document.getElementById('current-slide').textContent = currentSlide;
    }

    // Keyboard navigation
    document.addEventListener('keydown', (e) => {
      if (e.key === 'ArrowRight' || e.key === ' ') {
        nextSlide();
      } else if (e.key === 'ArrowLeft') {
        prevSlide();
      }
    });

    // Initialize on load
    window.onload = function() {
      initSlides();

      // Initialize Mermaid diagrams if present
      if (typeof mermaid !== 'undefined') {
        mermaid.initialize({
          theme: 'dark',
          securityLevel: 'loose',
          startOnLoad: true
        });
      }
    };
  </script>
</body>
</html>
      `;

      // Create a blob with the HTML content
      const blob = new Blob([htmlContent], { type: 'text/html' });
      const url = window.URL.createObjectURL(blob);
      const a = document.createElement('a');
      a.href = url;
      a.download = `${repo}_slides.html`;
      document.body.appendChild(a);
      a.click();
      window.URL.revokeObjectURL(url);
      document.body.removeChild(a);

    } catch (err) {
      console.error('Error exporting slides:', err);
      setExportError(err instanceof Error ? err.message : 'An unknown error occurred');
    } finally {
      setIsExporting(false);
    }
  }, [slides, repo, language]);

  // Navigation functions
  const goToNextSlide = useCallback(() => {
    if (currentSlideIndex < slides.length - 1) {
      setCurrentSlideIndex(prev => prev + 1);
    }
  }, [currentSlideIndex, slides.length]);

  const goToPrevSlide = useCallback(() => {
    if (currentSlideIndex > 0) {
      setCurrentSlideIndex(prev => prev - 1);
    }
  }, [currentSlideIndex]);

  const toggleFullscreen = useCallback(() => {
    setIsFullscreen(prev => !prev);
  }, []);

  // Handle keyboard navigation
  useEffect(() => {
    const handleKeyDown = (e: KeyboardEvent) => {
      if (e.key === 'ArrowRight' || e.key === 'Space') {
        goToNextSlide();
      } else if (e.key === 'ArrowLeft') {
        goToPrevSlide();
      } else if (e.key === 'f' || e.key === 'F') {
        toggleFullscreen();
      } else if (e.key === 'Escape' && isFullscreen) {
        setIsFullscreen(false);
      }
    };

    window.addEventListener('keydown', handleKeyDown);
    return () => {
      window.removeEventListener('keydown', handleKeyDown);
    };
  }, [goToNextSlide, goToPrevSlide, toggleFullscreen, isFullscreen]);

  // Track if we've already generated content
  const contentGeneratedRef = useRef(false);

  // Generate slides content on page load, but only once
  useEffect(() => {
    if (!contentGeneratedRef.current) {
      contentGeneratedRef.current = true;

      // First fetch the cached wiki content, then generate the slides
      (async () => {
        await fetchCachedWikiContent();
        generateSlidesContent();
      })();
    }
  }, [generateSlidesContent, fetchCachedWikiContent]);

  return (
    <div className={`min-h-screen flex flex-col ${isFullscreen ? 'fixed inset-0 z-50 bg-[#0d1117]' : 'bg-[var(--background)]'}`}>
      {/* Header - Hide in fullscreen mode */}
      {!isFullscreen && (
        <header className="sticky top-0 z-10 bg-[var(--card-bg)] border-b border-[var(--border-color)] shadow-sm">
          <div className="container mx-auto px-4 py-3 flex items-center justify-between">
            <div className="flex items-center space-x-4">
              <Link
                href={`/${owner}/${repo}${window.location.search}`}
                className="flex items-center text-[var(--foreground)] hover:text-[var(--accent-primary)] transition-colors"
              >
                <FaArrowLeft className="mr-2" />
                <span>{messages.slides?.backToWiki || 'Back to Wiki'}</span>
              </Link>
              <h1 className="text-xl font-bold text-[var(--accent-primary)]">
                {messages.slides?.title || 'Slides'}: {repo}
              </h1>
            </div>
            <div className="flex items-center space-x-3">
              <button
                onClick={generateSlidesContent}
                disabled={isLoading}
                className={`p-2 rounded-md ${isLoading ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
                title={messages.slides?.regenerate || 'Regenerate Slides'}
              >
                <FaSync className={`${isLoading ? 'animate-spin' : ''}`} />
              </button>
              <button
                onClick={exportSlides}
                disabled={!slides.length || isExporting}
                className={`p-2 rounded-md ${!slides.length || isExporting ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
                title={messages.slides?.export || 'Export Slides'}
              >
                <FaDownload />
              </button>
              <button
                onClick={toggleFullscreen}
                className="p-2 rounded-md bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20 transition-colors"
                title={messages.slides?.fullscreen || 'Toggle Fullscreen'}
              >
                <FaArrowUp />
              </button>
              <ThemeToggle />
            </div>
          </div>
        </header>
      )}

      {/* Main content */}
      <main className={`flex-1 flex flex-col ${isFullscreen ? 'p-0' : 'container mx-auto px-4 py-6'}`}>
        {isLoading && !slides.length ? (
          <div className="flex flex-col items-center justify-center p-8 flex-grow">
            <div className="w-12 h-12 border-4 border-[var(--accent-primary)]/30 border-t-[var(--accent-primary)] rounded-full animate-spin mb-4"></div>
            <p className="text-[var(--foreground)]">{loadingMessage}</p>
          </div>
        ) : error ? (
          <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md p-4 mb-6">
            <h3 className="text-red-800 dark:text-red-400 font-medium mb-2">{messages.common?.error || 'Error'}</h3>
            <p className="text-red-700 dark:text-red-300">{error}</p>
          </div>
        ) : slides.length > 0 ? (
          <div className="flex flex-col flex-grow">
            {/* Slide content */}
            <div className={`flex-grow flex flex-col items-center justify-center ${isFullscreen ? 'p-0 bg-[#0d1117]' : 'bg-[var(--card-bg)] border border-[var(--border-color)] rounded-lg shadow-sm p-6 mb-4'}`}>
              {exportError && (
                <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md p-3 mb-4 w-full">
                  <p className="text-red-700 dark:text-red-300 text-sm">{exportError}</p>
                </div>
              )}

              {/* Current slide */}
              <div
                className={`${isFullscreen ? 'w-full h-full' : 'w-full max-w-[1280px] aspect-[16/9]'} flex items-center justify-center overflow-hidden`}
              >
                {/* Include Font Awesome for icons */}
                <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@6.4.0/css/all.min.css" />
                <div className="w-full h-full" dangerouslySetInnerHTML={{ __html: slides[currentSlideIndex]?.html || '' }} />
              </div>
            </div>

            {/* Navigation controls */}
            <div className={`flex items-center justify-between ${isFullscreen ? 'fixed bottom-6 left-1/2 transform -translate-x-1/2 bg-[#0d1117]/80 px-6 py-3 rounded-full z-10 shadow-lg' : 'mt-4'}`}>
              <button
                onClick={goToPrevSlide}
                disabled={currentSlideIndex === 0}
                className={`p-2 rounded-md ${currentSlideIndex === 0 ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
              >
                <FaArrowLeft />
              </button>

              <div className={`text-[var(--foreground)] ${isFullscreen ? 'mx-4' : ''}`}>
                Slide {currentSlideIndex + 1} of {slides.length}
              </div>

              <button
                onClick={goToNextSlide}
                disabled={currentSlideIndex === slides.length - 1}
                className={`p-2 rounded-md ${currentSlideIndex === slides.length - 1 ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
              >
                <FaArrowRight />
              </button>

              {isFullscreen && (
                <button
                  onClick={toggleFullscreen}
                  className="p-2 ml-4 rounded-md bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20 transition-colors"
                  title={messages.slides?.fullscreen || 'Exit Fullscreen'}
                >
                  <FaTimes />
                </button>
              )}
            </div>
          </div>
        ) : (
          <div className="flex flex-col items-center justify-center p-8 flex-grow">
            <p className="text-[var(--foreground)]">{messages.slides?.noSlides || 'No slides generated yet. Click the refresh button to generate slides.'}</p>
          </div>
        )}
      </main>
    </div>
  );
}


================================================
FILE: src/app/[owner]/[repo]/workshop/page.tsx
================================================
'use client';

import React, { useCallback, useState, useEffect, useRef, useMemo } from 'react';
import { useParams, useSearchParams } from 'next/navigation';
import Link from 'next/link';
import { FaArrowLeft, FaSync, FaDownload } from 'react-icons/fa';
import ThemeToggle from '@/components/theme-toggle';
import Markdown from '@/components/Markdown';
import { useLanguage } from '@/contexts/LanguageContext';
import { RepoInfo } from '@/types/repoinfo';
import getRepoUrl from '@/utils/getRepoUrl';

// Helper function to add tokens and other parameters to request body
const addTokensToRequestBody = (
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  requestBody: Record<string, any>,
  token: string,
  repoType: string,
  provider: string = '',
  model: string = '',
  isCustomModel: boolean = false,
  customModel: string = '',
  language: string = 'en',
) => {
  if (token !== '') {
    requestBody.token = token;
  }

  // Add provider-based model selection parameters
  requestBody.provider = provider;
  requestBody.model = model;
  if (isCustomModel && customModel) {
    requestBody.custom_model = customModel;
  }

  requestBody.language = language;
};

export default function WorkshopPage() {
  // Get route parameters and search params
  const params = useParams();
  const searchParams = useSearchParams();

  // Extract owner and repo from route params
  const owner = params.owner as string;
  const repo = params.repo as string;

  // Extract tokens from search params
  const token = searchParams.get('token') || '';
  const repoType = searchParams.get('type') || 'github';
  const localPath = searchParams.get('local_path') ? decodeURIComponent(searchParams.get('local_path') || '') : undefined;
  const repoUrl = searchParams.get('repo_url') ? decodeURIComponent(searchParams.get('repo_url') || '') : undefined;
  const providerParam = searchParams.get('provider') || '';
  const modelParam = searchParams.get('model') || '';
  const isCustomModelParam = searchParams.get('is_custom_model') === 'true';
  const customModelParam = searchParams.get('custom_model') || '';
  const language = searchParams.get('language') || 'en';

  // Import language context for translations
  const { messages } = useLanguage();

  // Initialize repo info with useMemo to prevent unnecessary re-renders
  const repoInfo = useMemo<RepoInfo>(() => ({
    owner,
    repo,
    type: repoType,
    token: token || null,
    localPath: localPath || null,
    repoUrl: repoUrl || null
  }), [owner, repo, repoType, token, localPath, repoUrl]);

  // State variables
  const [isLoading, setIsLoading] = useState(false);
  const [loadingMessage, setLoadingMessage] = useState<string | undefined>(
    messages.loading?.initializing || 'Initializing workshop generation...'
  );
  const [error, setError] = useState<string | null>(null);
  const [workshopContent, setWorkshopContent] = useState<string>('');
  const [isExporting, setIsExporting] = useState(false);
  const [exportError, setExportError] = useState<string | null>(null);
  // Define a type for the wiki content
  interface WikiPage {
    id: string;
    title: string;
    content: string;
    importance: string;
    filePaths: string[];
    relatedPages: string[];
  }

  interface WikiSection {
    id: string;
    title: string;
    pages: string[];
    subsections: string[];
  }

  interface WikiStructure {
    description: string;
    pages: WikiPage[];
    sections: WikiSection[];
    rootSections: string[];
  }

  interface WikiCacheData {
    wiki_structure: WikiStructure;
    generated_pages: Record<string, WikiPage>;
  }

  const [cachedWikiContent, setCachedWikiContent] = useState<WikiCacheData | null>(null);

  // Function to fetch cached wiki content
  const fetchCachedWikiContent = useCallback(async () => {
    try {
      const params = new URLSearchParams({
        owner: repoInfo.owner,
        repo: repoInfo.repo,
        repo_type: repoInfo.type,
        language: language,
      });
      const response = await fetch(`/api/wiki_cache?${params.toString()}`);

      if (response.ok) {
        const cachedData = await response.json();
        if (cachedData && cachedData.wiki_structure && cachedData.generated_pages &&
            Object.keys(cachedData.generated_pages).length > 0) {
          console.log('Successfully fetched cached wiki data for workshop generation');
          setCachedWikiContent(cachedData);
          return cachedData;
        } else {
          console.log('No valid wiki data in server cache or cache is empty.');
          return null;
        }
      } else {
        console.error('Error fetching wiki cache from server:', response.status);
        return null;
      }
    } catch (error) {
      console.error('Error loading from server cache:', error);
      return null;
    }
  }, [repoInfo.owner, repoInfo.repo, repoInfo.type, language]);

  // Generate workshop content
  const generateWorkshopContent = useCallback(async () => {
    if (isLoading) return;

    setIsLoading(true);
    setError(null);
    // Clear previous content
    setWorkshopContent('');
    setLoadingMessage(messages.loading?.generatingWorkshop || 'Generating workshop content...');

    try {
      // Get repository URL
      const repoUrl = getRepoUrl(repoInfo);

      // Fetch cached wiki content if not already available
      let wikiData = cachedWikiContent;
      if (!wikiData) {
        wikiData = await fetchCachedWikiContent();
      }

      // We'll just pass the entire wiki data to the LLM without complex processing
      let wikiContent = '';

      if (wikiData && wikiData.wiki_structure && wikiData.generated_pages) {
        // Add the wiki structure description
        wikiContent += `## Project Overview\n${wikiData.wiki_structure.description || ''}\n\n`;

        // Add all wiki pages content
        const pages = wikiData.wiki_structure.pages || [];
        const generatedPages = wikiData.generated_pages || {};

        // Limit the total content to avoid token limits
        let totalContentLength = 0;
        const maxContentLength = 30000; // Approximate limit to avoid token issues

        // First add high importance pages
        const highImportancePages = pages.filter(page => page.importance === 'high');
        for (const page of highImportancePages) {
          if (generatedPages[page.id] && generatedPages[page.id].content) {
            const content = `## ${page.title}\n${generatedPages[page.id].content}\n\n`;
            wikiContent += content;
            totalContentLength += content.length;

            if (totalContentLength > maxContentLength) break;
          }
        }

        // Then add other pages if we still have space
        if (totalContentLength < maxContentLength) {
          for (const page of pages) {
            // Skip high importance pages we've already added
            if (page.importance === 'high') continue;

            if (generatedPages[page.id] && generatedPages[page.id].content) {
              const content = `## ${page.title}\n${generatedPages[page.id].content}\n\n`;

              // Check if adding this content would exceed our limit
              if (totalContentLength + content.length > maxContentLength) {
                // If it would exceed, just add a summary
                const summaryMatch = generatedPages[page.id].content.match(/# .*?\n\n(.*?)(\n\n|$)/);
                const summary = summaryMatch ? summaryMatch[1].trim() : 'No summary available';
                const summaryContent = `## ${page.title}\n${summary}\n\n`;

                wikiContent += summaryContent;
                totalContentLength += summaryContent.length;
              } else {
                // Otherwise add the full content
                wikiContent += content;
                totalContentLength += content.length;
              }

              if (totalContentLength > maxContentLength) break;
            }
          }
        }
      }

      // Prepare request body with enhanced context from wiki
      const requestBody: Record<string, unknown> = {
        repo_url: repoUrl,
        type: repoInfo.type,
        messages: [{
          role: 'user',
          content: `Create a comprehensive workshop for learning how to use and contribute to the ${owner}/${repo} repository.

I'll provide you with information from the project's wiki to help you create a more accurate and relevant workshop.

${wikiContent}

This workshop should be designed as a hands-on tutorial that guides users through understanding, using, and potentially contributing to this project. The workshop should be highly readable and optimized for quick onboarding of new users.

The workshop should include:

1. A series of progressive exercises that build on each other (at least 3-4 exercises)
2. Clear instructions for each exercise with step-by-step guidance
3. Code examples and snippets where appropriate
4. "Challenge" sections that encourage deeper exploration
5. Solutions for each exercise and challenge (in collapsible sections using <details> tags)
6. Explanations that connect the exercises to the actual codebase

Format the workshop in Markdown with the following structure:

# ${repo} Workshop

## Introduction
- Brief overview of the project
- What users will learn in this workshop
- Prerequisites and setup instructions

## Exercise 1: [First Core Concept]
- Explanation of the concept
- Step-by-step instructions with clear formatting
- Expected outcome
- Challenge (optional harder task)
- Solution (in a collapsible section using <details> tags)

## Exercise 2: [Second Core Concept]
...

## Exercise 3: [Third Core Concept]
...

## Final Project
- A culminating exercise that brings together multiple concepts
- Clear success criteria
- Solution

## Next Steps
- Suggestions for further learning
- How to contribute to the project
- Additional resources

IMPORTANT FORMATTING GUIDELINES:
1. Use clear headings and subheadings with proper hierarchy
2. Use bullet points and numbered lists for clarity
3. Highlight important information in **bold** or with blockquotes
4. Use code blocks with proper syntax highlighting
5. Include Mermaid diagrams where they would help illustrate concepts or workflows
6. Put solutions in collapsible <details> sections
7. Use tables for comparing options or summarizing information
8. Break long sections into smaller, digestible chunks
9. Use consistent formatting throughout

IMPORTANT CONTENT GUIDELINES:
1. Make sure each exercise focuses on a REAL aspect of the ${repo} repository
2. Use REAL code examples from the repository, not generic examples
3. Create exercises that are practical and relevant to the actual codebase
4. Include at least 3-4 exercises covering different aspects of the repository
5. The final project should be challenging but achievable
6. Ensure the workshop is specific to this repository, not generic
7. Focus on the most important/core features of the repository
8. Include diagrams to visualize complex concepts
9. Make sure the workshop is engaging and interactive

Make the workshop content in ${language === 'en' ? 'English' :
  language === 'ja' ? 'Japanese (日本語)' :
  language === 'zh' ? 'Mandarin Chinese (中文)' :
  language === 'zh-tw' ? 'Traditional Chinese (繁體中文)' :
  language === 'es' ? 'Spanish (Español)' :
  language === 'kr' ? 'Korean (한국어)' :
  language === 'vi' ? 'Vietnamese (Tiếng Việt)' : 
  language === "pt-br" ? "Brazilian Portuguese (Português Brasileiro)" :
  language === "fr" ? "Français (French)" :
  language === "ru" ? "Русский (Russian)" :
  'English'} language.`
        }]
      };

      // Add tokens if available
      addTokensToRequestBody(requestBody, token, repoInfo.type, providerParam, modelParam, isCustomModelParam, customModelParam, language);

      // Use WebSocket for communication
      let content = '';

      try {
        // Create WebSocket URL from the server base URL
        const serverBaseUrl = process.env.SERVER_BASE_URL || 'http://localhost:8001';
        const wsBaseUrl = serverBaseUrl.replace(/^http/, 'ws')? serverBaseUrl.replace(/^https/, 'wss'): serverBaseUrl.replace(/^http/, 'ws');
        const wsUrl = `${wsBaseUrl}/ws/chat`;

        // Create a new WebSocket connection
        const ws = new WebSocket(wsUrl);

        // Create a promise that resolves when the WebSocket connection is complete
        await new Promise<void>((resolve, reject) => {
          // Set up event handlers
          ws.onopen = () => {
            console.log('WebSocket connection established for workshop generation');
            // Send the request as JSON
            ws.send(JSON.stringify(requestBody));
            resolve();
          };

          ws.onerror = (error) => {
            console.error('WebSocket error:', error);
            reject(new Error('WebSocket connection failed'));
          };

          // If the connection doesn't open within 5 seconds, fall back to HTTP
          const timeout = setTimeout(() => {
            reject(new Error('WebSocket connection timeout'));
          }, 5000);

          // Clear the timeout if the connection opens successfully
          ws.onopen = () => {
            clearTimeout(timeout);
            console.log('WebSocket connection established for workshop generation');
            // Send the request as JSON
            ws.send(JSON.stringify(requestBody));
            resolve();
          };
        });

        // Create a promise that resolves when the WebSocket response is complete
        await new Promise<void>((resolve, reject) => {
          // Use a local variable to accumulate content
          let accumulatedContent = '';

          // Handle incoming messages
          ws.onmessage = (event) => {
            const chunk = event.data;
            content += chunk;
            accumulatedContent += chunk;

            // Update the state with the accumulated content
            setWorkshopContent(accumulatedContent);
          };

          // Handle WebSocket close
          ws.onclose = () => {
            console.log('WebSocket connection closed for workshop generation');
            resolve();
          };

          // Handle WebSocket errors
          ws.onerror = (error) => {
            console.error('WebSocket error during message reception:', error);
            reject(new Error('WebSocket error during message reception'));
          };
        });
      } catch (wsError) {
        console.error('WebSocket error, falling back to HTTP:', wsError);

        // Fall back to HTTP if WebSocket fails
        const response = await fetch(`/api/chat/stream`, {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
          },
          body: JSON.stringify(requestBody)
        });

        if (!response.ok) {
          const errorText = await response.text().catch(() => 'No error details available');
          throw new Error(`Error generating workshop content: ${response.status} - ${errorText}`);
        }

        // Process the response
        content = '';
        const reader = response.body?.getReader();
        const decoder = new TextDecoder();

        if (!reader) {
          throw new Error('Failed to get response reader');
        }

        try {
          // Use a local variable to accumulate content
          let accumulatedContent = '';

          while (true) {
            const { done, value } = await reader.read();
            if (done) break;
            const chunk = decoder.decode(value, { stream: true });
            content += chunk;
            accumulatedContent += chunk;

            // Update the state with the accumulated content
            setWorkshopContent(accumulatedContent);
          }
          // Ensure final decoding
          const finalChunk = decoder.decode();
          content += finalChunk;
          accumulatedContent += finalChunk;
          setWorkshopContent(accumulatedContent);
        } catch (readError) {
          console.error('Error reading stream:', readError);
          throw new Error('Error processing response stream');
        }
      }

      // Clean up markdown delimiters
      content = content.replace(/^```markdown\s*/i, '').replace(/```\s*$/i, '');

      // Add a table of contents if it doesn't already have one
      if (!content.includes('## Table of Contents') && !content.includes('## Contents')) {
        const headings = content.match(/^## (.*)$/gm) || [];
        if (headings.length > 0) {
          let toc = '## Table of Contents\n\n';
          headings.forEach(heading => {
            const headingText = heading.replace('## ', '');
            // Create a link-friendly version of the heading
            const headingLink = headingText
              .toLowerCase()
              .replace(/[^\w\s-]/g, '')
              .replace(/\s+/g, '-');
            toc += `- [${headingText}](#${headingLink})\n`;
          });
          toc += '\n';

          // Find the position after the introduction heading
          const introPos = content.indexOf('# ') + 1;
          const nextHeadingPos = content.indexOf('## ', introPos);

          if (nextHeadingPos > introPos) {
            // Insert the TOC after the introduction
            content = content.slice(0, nextHeadingPos) + toc + content.slice(nextHeadingPos);
          }
        }
      }

      // Add progress indicators to exercises
      const exerciseHeadings = content.match(/^## Exercise \d+:/gm) || [];
      if (exerciseHeadings.length > 0) {
        const totalExercises = exerciseHeadings.length;

        // Replace each exercise heading with a heading that includes a progress indicator
        for (let i = 0; i < totalExercises; i++) {
          const exerciseHeading = exerciseHeadings[i];

          // Estimate time to complete based on exercise number (earlier exercises are usually simpler)
          let estimatedTime = 10; // default 10 minutes
          if (i === 0) estimatedTime = 5; // first exercise is usually simpler
          else if (i === totalExercises - 1) estimatedTime = 15; // last exercise is usually more complex
          else if (i > Math.floor(totalExercises / 2)) estimatedTime = 12; // later exercises are more complex

          const progressIndicator = `<div style="text-align: right; font-size: 0.85em; color: #666;">
Exercise ${i + 1} of ${totalExercises} | Estimated time: ${estimatedTime} minutes
</div>\n\n`;

          // Find the position of the exercise heading
          const headingPos = content.indexOf(exerciseHeading);
          if (headingPos !== -1) {
            // Find the end of the line
            const lineEndPos = content.indexOf('\n', headingPos);
            if (lineEndPos !== -1) {
              // Insert the progress indicator after the heading
              content = content.slice(0, lineEndPos + 1) + progressIndicator + content.slice(lineEndPos + 1);
            }
          }
        }
      }

      // Add a note about the final project
      const finalProjectHeading = content.match(/^## Final Project/m);
      if (finalProjectHeading) {
        const headingPos = content.indexOf(finalProjectHeading[0]);
        if (headingPos !== -1) {
          const lineEndPos = content.indexOf('\n', headingPos);
          if (lineEndPos !== -1) {
            const finalProjectNote = `<div style="text-align: right; font-size: 0.85em; color: #666;">
Estimated time: 20-30 minutes | Combines concepts from all exercises
</div>\n\n`;
            content = content.slice(0, lineEndPos + 1) + finalProjectNote + content.slice(lineEndPos + 1);
          }
        }
      }

      setWorkshopContent(content);

    } catch (err) {
      console.error('Error generating workshop content:', err);
      setError(err instanceof Error ? err.message : 'An unknown error occurred');
    } finally {
      setIsLoading(false);
      setLoadingMessage(undefined);
    }
  }, [owner, repo, repoInfo, token, providerParam, modelParam, isCustomModelParam, customModelParam, language, isLoading, messages.loading, cachedWikiContent, fetchCachedWikiContent]);

  // Export workshop content
  const exportWorkshop = useCallback(async () => {
    if (!workshopContent) {
      setExportError('No workshop content to export');
      return;
    }

    try {
      setIsExporting(true);
      setExportError(null);

      // Create a blob with the workshop content
      const blob = new Blob([workshopContent], { type: 'text/markdown' });
      const url = window.URL.createObjectURL(blob);
      const a = document.createElement('a');
      a.href = url;
      a.download = `${repo}_workshop.md`;
      document.body.appendChild(a);
      a.click();
      window.URL.revokeObjectURL(url);
      document.body.removeChild(a);

    } catch (err) {
      console.error('Error exporting workshop:', err);
      setExportError(err instanceof Error ? err.message : 'An unknown error occurred');
    } finally {
      setIsExporting(false);
    }
  }, [workshopContent, repo]);

  // Track if we've already generated content
  const contentGeneratedRef = useRef(false);

  // Generate workshop content on page load, but only once
  useEffect(() => {
    if (!contentGeneratedRef.current) {
      contentGeneratedRef.current = true;

      // First fetch the cached wiki content, then generate the workshop
      (async () => {
        await fetchCachedWikiContent();
        generateWorkshopContent();
      })();
    }
  }, [generateWorkshopContent, fetchCachedWikiContent]);

  return (
    <div className="min-h-screen flex flex-col bg-[var(--background)]">
      {/* Header */}
      <header className="sticky top-0 z-10 bg-[var(--card-bg)] border-b border-[var(--border-color)] shadow-sm">
        <div className="container mx-auto px-4 py-3 flex items-center justify-between">
          <div className="flex items-center space-x-4">
            <Link
              href={`/${owner}/${repo}${window.location.search}`}
              className="flex items-center text-[var(--foreground)] hover:text-[var(--accent-primary)] transition-colors"
            >
              <FaArrowLeft className="mr-2" />
              <span>{messages.workshop?.backToWiki || 'Back to Wiki'}</span>
            </Link>
            <h1 className="text-xl font-bold text-[var(--accent-primary)]">
              {messages.workshop?.title || 'Workshop'}: {repo}
            </h1>
          </div>
          <div className="flex items-center space-x-3">
            <button
              onClick={generateWorkshopContent}
              disabled={isLoading}
              className={`p-2 rounded-md ${isLoading ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
              title={messages.workshop?.regenerate || 'Regenerate Workshop'}
            >
              <FaSync className={`${isLoading ? 'animate-spin' : ''}`} />
            </button>
            <button
              onClick={exportWorkshop}
              disabled={!workshopContent || isExporting}
              className={`p-2 rounded-md ${!workshopContent || isExporting ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)]' : 'bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] hover:bg-[var(--accent-primary)]/20'} transition-colors`}
              title={messages.workshop?.export || 'Export Workshop'}
            >
              <FaDownload />
            </button>
            <ThemeToggle />
          </div>
        </div>
      </header>

      {/* Main content */}
      <main className="flex-1 container mx-auto px-4 py-6">
        {isLoading && !workshopContent ? (
          <div className="flex flex-col items-center justify-center p-8">
            <div className="w-12 h-12 border-4 border-[var(--accent-primary)]/30 border-t-[var(--accent-primary)] rounded-full animate-spin mb-4"></div>
            <p className="text-[var(--foreground)]">{loadingMessage}</p>
          </div>
        ) : error ? (
          <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md p-4 mb-6">
            <h3 className="text-red-800 dark:text-red-400 font-medium mb-2">{messages.common?.error || 'Error'}</h3>
            <p className="text-red-700 dark:text-red-300">{error}</p>
          </div>
        ) : (
          <div className="bg-[var(--card-bg)] border border-[var(--border-color)] rounded-lg shadow-sm p-6">
            {exportError && (
              <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md p-3 mb-4">
                <p className="text-red-700 dark:text-red-300 text-sm">{exportError}</p>
              </div>
            )}
            <Markdown content={workshopContent} />
          </div>
        )}
      </main>
    </div>
  );
}


================================================
FILE: src/app/api/auth/status/route.ts
================================================
import { NextResponse } from "next/server";

const TARGET_SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

export async function GET() {
  try {
    // Forward the request to the backend API
    const response = await fetch(`${TARGET_SERVER_BASE_URL}/auth/status`, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
      },
    });
    
    if (!response.ok) {
      return NextResponse.json(
        { error: `Backend server returned ${response.status}` },
        { status: response.status }
      );
    }
    
    const data = await response.json();
    return NextResponse.json(data);
  } catch (error) {
    console.error('Error forwarding request to backend:', error);
    return NextResponse.json(
      { error: 'Internal Server Error' },
      { status: 500 }
    );
  }
}


================================================
FILE: src/app/api/auth/validate/route.ts
================================================
import { NextRequest, NextResponse } from "next/server";

const TARGET_SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

export async function POST(request: NextRequest) {
  try {
    const body = await request.json();
    
    // Forward the request to the backend API
    const response = await fetch(`${TARGET_SERVER_BASE_URL}/auth/validate`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(body),
    });
    
    if (!response.ok) {
      return NextResponse.json(
        { error: `Backend server returned ${response.status}` },
        { status: response.status }
      );
    }
    
    const data = await response.json();
    return NextResponse.json(data);
  } catch (error) {
    console.error('Error forwarding request to backend:', error);
    return NextResponse.json(
      { error: 'Internal Server Error' },
      { status: 500 }
    );
  }
}


================================================
FILE: src/app/api/chat/stream/route.ts
================================================
import { NextRequest, NextResponse } from 'next/server';

// The target backend server base URL, derived from environment variable or defaulted.
// This should match the logic in your frontend's page.tsx for consistency.
const TARGET_SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

// This is a fallback HTTP implementation that will be used if WebSockets are not available
// or if there's an error with the WebSocket connection
export async function POST(req: NextRequest) {
  try {
    const requestBody = await req.json(); // Assuming the frontend sends JSON

    // Note: This endpoint now uses the HTTP fallback instead of WebSockets
    // The WebSocket implementation is in src/utils/websocketClient.ts
    // This HTTP endpoint is kept for backward compatibility
    console.log('Using HTTP fallback for chat completion instead of WebSockets');

    const targetUrl = `${TARGET_SERVER_BASE_URL}/chat/completions/stream`;

    // Make the actual request to the backend service
    const backendResponse = await fetch(targetUrl, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Accept': 'text/event-stream', // Indicate that we expect a stream
      },
      body: JSON.stringify(requestBody),
    });

    // If the backend service returned an error, forward that error to the client
    if (!backendResponse.ok) {
      const errorBody = await backendResponse.text();
      const errorHeaders = new Headers();
      backendResponse.headers.forEach((value, key) => {
        errorHeaders.set(key, value);
      });
      return new NextResponse(errorBody, {
        status: backendResponse.status,
        statusText: backendResponse.statusText,
        headers: errorHeaders,
      });
    }

    // Ensure the backend response has a body to stream
    if (!backendResponse.body) {
      return new NextResponse('Stream body from backend is null', { status: 500 });
    }

    // Create a new ReadableStream to pipe the data from the backend to the client
    const stream = new ReadableStream({
      async start(controller) {
        const reader = backendResponse.body!.getReader();
        try {
          while (true) {
            const { done, value } = await reader.read();
            if (done) {
              break;
            }
            controller.enqueue(value);
          }
        } catch (error) {
          console.error('Error reading from backend stream in proxy:', error);
          controller.error(error);
        } finally {
          controller.close();
          reader.releaseLock(); // Important to release the lock on the reader
        }
      },
      cancel(reason) {
        console.log('Client cancelled stream request:', reason);
      }
    });

    // Set up headers for the response to the client
    const responseHeaders = new Headers();
    // Copy the Content-Type from the backend response (e.g., 'text/event-stream')
    const contentType = backendResponse.headers.get('Content-Type');
    if (contentType) {
      responseHeaders.set('Content-Type', contentType);
    }
    // It's good practice for streams not to be cached or transformed by intermediaries.
    responseHeaders.set('Cache-Control', 'no-cache, no-transform');

    return new NextResponse(stream, {
      status: backendResponse.status, // Should be 200 for a successful stream start
      headers: responseHeaders,
    });

  } catch (error) {
    console.error('Error in API proxy route (/api/chat/stream):', error);
    let errorMessage = 'Internal Server Error in proxy';
    if (error instanceof Error) {
      errorMessage = error.message;
    }
    return new NextResponse(JSON.stringify({ error: errorMessage }), {
      status: 500,
      headers: { 'Content-Type': 'application/json' },
    });
  }
}

// Optional: Handle OPTIONS requests for CORS if you ever call this from a different origin
// or use custom headers that trigger preflight requests. For same-origin, it's less critical.
export async function OPTIONS() {
  return new NextResponse(null, {
    status: 204, // No Content
    headers: {
      'Access-Control-Allow-Origin': '*', // Be more specific in production if needed
      'Access-Control-Allow-Methods': 'POST, OPTIONS',
      'Access-Control-Allow-Headers': 'Content-Type, Authorization', // Adjust as per client's request headers
    },
  });
}

================================================
FILE: src/app/api/models/config/route.ts
================================================
import { NextResponse } from 'next/server';

// The target backend server base URL, derived from environment variable or defaulted.
const TARGET_SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

export async function GET() {
  try {
    const targetUrl = `${TARGET_SERVER_BASE_URL}/models/config`;

    // Make the actual request to the backend service
    const backendResponse = await fetch(targetUrl, {
      method: 'GET',
      headers: {
        'Accept': 'application/json',
      }
    });

    // If the backend service responds with an error
    if (!backendResponse.ok) {
      return NextResponse.json(
        { error: `Backend service responded with status: ${backendResponse.status}` },
        { status: backendResponse.status }
      );
    }

    // Forward the response from the backend
    const modelConfig = await backendResponse.json();
    return NextResponse.json(modelConfig);
  } catch (error) {
    console.error('Error fetching model configurations:', error);    
    return new NextResponse(JSON.stringify({ error: error }), {
        status: 500,
        headers: { 'Content-Type': 'application/json' },
      });
  }
}

// Handle OPTIONS requests for CORS if needed
export function OPTIONS() {
  return new NextResponse(null, {
    status: 204,
    headers: {
      'Access-Control-Allow-Origin': '*',
      'Access-Control-Allow-Methods': 'GET',
      'Access-Control-Allow-Headers': 'Content-Type, Authorization',
    },
  });
}


================================================
FILE: src/app/api/wiki/projects/route.ts
================================================
import { NextResponse } from 'next/server';

// This should match the expected structure from your Python backend
interface ApiProcessedProject {
  id: string;
  owner: string;
  repo: string;
  name: string;
  repo_type: string;
  submittedAt: number;
  language: string;
}
// Payload for deleting a project cache
interface DeleteProjectCachePayload {
  owner: string;
  repo: string;
  repo_type: string;
  language: string;
}

/** Type guard to validate DeleteProjectCachePayload at runtime */
function isDeleteProjectCachePayload(obj: unknown): obj is DeleteProjectCachePayload {
  return (
    obj != null &&
    typeof obj === 'object' &&
    'owner' in obj && typeof (obj as Record<string, unknown>).owner === 'string' && ((obj as Record<string, unknown>).owner as string).trim() !== '' &&
    'repo' in obj && typeof (obj as Record<string, unknown>).repo === 'string' && ((obj as Record<string, unknown>).repo as string).trim() !== '' &&
    'repo_type' in obj && typeof (obj as Record<string, unknown>).repo_type === 'string' && ((obj as Record<string, unknown>).repo_type as string).trim() !== '' &&
    'language' in obj && typeof (obj as Record<string, unknown>).language === 'string' && ((obj as Record<string, unknown>).language as string).trim() !== ''
  );
}

// Ensure this matches your Python backend configuration
const PYTHON_BACKEND_URL = process.env.PYTHON_BACKEND_HOST || 'http://localhost:8001';
const PROJECTS_API_ENDPOINT = `${PYTHON_BACKEND_URL}/api/processed_projects`;
const CACHE_API_ENDPOINT = `${PYTHON_BACKEND_URL}/api/wiki_cache`;

export async function GET() {
  try {
    const response = await fetch(PROJECTS_API_ENDPOINT, {
      method: 'GET',
      headers: {
        'Content-Type': 'application/json',
        // Add any other headers your Python backend might require, e.g., API keys
      },
      cache: 'no-store', // Ensure fresh data is fetched every time
    });

    if (!response.ok) {
      // Try to parse error from backend, otherwise use status text
      let errorBody = { error: `Failed to fetch from Python backend: ${response.statusText}` };
      try {
        errorBody = await response.json();
      } catch {
        // If parsing JSON fails, errorBody will retain its default value
        // The error from backend is logged in the next line anyway
      }
      console.error(`Error from Python backend (${PROJECTS_API_ENDPOINT}): ${response.status} - ${JSON.stringify(errorBody)}`);
      return NextResponse.json(errorBody, { status: response.status });
    }

    const projects: ApiProcessedProject[] = await response.json();
    return NextResponse.json(projects);

  } catch (error: unknown) {
    console.error(`Network or other error when fetching from ${PROJECTS_API_ENDPOINT}:`, error);
    const message = error instanceof Error ? error.message : 'An unknown error occurred';
    return NextResponse.json(
      { error: `Failed to connect to the Python backend. ${message}` },
      { status: 503 } // Service Unavailable
    );
  }
}

export async function DELETE(request: Request) {
  try {
    const body: unknown = await request.json();
    if (!isDeleteProjectCachePayload(body)) {
      return NextResponse.json(
        { error: 'Invalid request body: owner, repo, repo_type, and language are required and must be non-empty strings.' },
        { status: 400 }
      );
    }
    const { owner, repo, repo_type, language } = body;
    const params = new URLSearchParams({ owner, repo, repo_type, language });
    const response = await fetch(`${CACHE_API_ENDPOINT}?${params}`, {
      method: 'DELETE',
      headers: { 'Content-Type': 'application/json' },
    });
    if (!response.ok) {
      let errorBody = { error: response.statusText };
      try {
        errorBody = await response.json();
      } catch {}
      console.error(`Error deleting project cache (${CACHE_API_ENDPOINT}): ${response.status} - ${JSON.stringify(errorBody)}`);
      return NextResponse.json(errorBody, { status: response.status });
    }
    return NextResponse.json({ message: 'Project deleted successfully' });
  } catch (error: unknown) {
    console.error('Error in DELETE /api/wiki/projects:', error);
    const message = error instanceof Error ? error.message : 'An unknown error occurred';
    return NextResponse.json({ error: `Failed to delete project: ${message}` }, { status: 500 });
  }
}

================================================
FILE: src/app/globals.css
================================================
@import "tailwindcss";

/* Define dark mode variant */
@custom-variant dark (&:where([data-theme="dark"], [data-theme="dark"] *));

:root {
  /* Japanese aesthetic color palette - light mode */
  --background: #f8f4e6; /* Warm off-white like washi paper */
  --foreground: #333333; /* Soft black for text */
  --shadow-color: rgba(0, 0, 0, 0.05);
  --accent-primary: #9b7cb9; /* Soft purple (Fuji) */
  --accent-secondary: #d7c4bb; /* Soft beige (Kinari) */
  --border-color: #e0d8c8; /* Soft beige border */
  --card-bg: #fffaf0; /* Slightly warmer than background */
  --highlight: #e8927c; /* Soft coral (Akane) */
  --muted: #a59e8c; /* Soft gray-brown (Nezumi) */
  --link-color: #7c5aa0; /* Slightly darker purple for links */
}

html[data-theme='dark'] {
  /* Japanese aesthetic color palette - dark mode */
  --background: #1a1a1a; /* Deep charcoal */
  --foreground: #f0f0f0; /* Soft white */
  --shadow-color: rgba(0, 0, 0, 0.2);
  --accent-primary: #9370db; /* Soft lavender */
  --accent-secondary: #5d4037; /* Warm brown */
  --border-color: #2c2c2c; /* Dark border */
  --card-bg: #222222; /* Slightly lighter than background */
  --highlight: #e57373; /* Soft red */
  --muted: #8c8c8c; /* Muted gray */
  --link-color: #b19cd9; /* Lighter purple for dark mode links */
}

/* Fix for unreadable <select> options in Chrome's dark mode */
[data-theme="dark"] select option {
  background: var(--background);
}

@theme inline {
  --color-background: var(--background);
  --color-foreground: var(--foreground);
  --font-sans: 'Noto Sans JP', sans-serif;
  --font-mono: 'Geist Mono', monospace;
  --font-geist-sans: 'Noto Sans JP', sans-serif;
  --font-geist-mono: 'Geist Mono', monospace;
  --font-serif-jp: 'Noto Serif JP', serif;
}

body {
  background: var(--background);
  color: var(--foreground);
  font-family: var(--font-sans), sans-serif;
}

/* Custom shadow styles - more subtle for Japanese aesthetic */
.shadow-custom {
  box-shadow: 0 4px 8px -2px var(--shadow-color);
}

/* Paper texture background */
.paper-texture {
  background-color: var(--card-bg);
  background-image: url("data:image/svg+xml,%3Csvg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M11 18c3.866 0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm48 25c3.866 0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm-43-7c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zm63 31c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zM34 90c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zm56-76c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zM12 86c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm28-65c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm23-11c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm-6 60c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm29 22c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zM32 63c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm57-13c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm-9-21c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM60 91c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM35 41c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM12 60c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2z' fill='%23e0d8c8' fill-opacity='0.1' fill-rule='evenodd'/%3E%3C/svg%3E");
}

/* Dark mode paper texture */
html[data-theme='dark'] .paper-texture {
  background-image: url("data:image/svg+xml,%3Csvg width='100' height='100' viewBox='0 0 100 100' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M11 18c3.866 0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm48 25c3.866 0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm-43-7c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zm63 31c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zM34 90c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zm56-76c1.657 0 3-1.343 3-3s-1.343-3-3-3-3 1.343-3 3 1.343 3 3 3zM12 86c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm28-65c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm23-11c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm-6 60c2.21 0 4-1.79 4-4s-1.79-4-4-4-4 1.79-4 4 1.79 4 4 4zm29 22c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zM32 63c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm57-13c2.76 0 5-2.24 5-5s-2.24-5-5-5-5 2.24-5 5 2.24 5 5 5zm-9-21c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM60 91c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM35 41c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2zM12 60c1.105 0 2-.895 2-2s-.895-2-2-2-2 .895-2 2 .895 2 2 2z' fill='%23333333' fill-opacity='0.1' fill-rule='evenodd'/%3E%3C/svg%3E");
}

/* Japanese-style buttons */
.btn-japanese {
  background-color: var(--accent-primary);
  color: white;
  border: none;
  border-radius: 0.25rem;
  padding: 0.5rem 1.5rem;
  font-weight: 500;
  transition: all 0.3s ease;
  position: relative;
  overflow: hidden;
}

.btn-japanese:hover {
  background-color: var(--highlight);
}

.btn-japanese:before {
  content: '';
  position: absolute;
  top: 0;
  left: 0;
  width: 0;
  height: 100%;
  background-color: rgba(255, 255, 255, 0.2);
  transition: width 0.3s ease;
}

.btn-japanese:hover:before {
  width: 100%;
}

/* Japanese-style inputs */
.input-japanese {
  background-color: transparent;
  border: 1px solid var(--border-color);
  border-radius: 0.25rem;
  padding: 0.5rem 1rem;
  transition: all 0.3s ease;
}

.input-japanese:focus {
  border-color: var(--accent-primary);
  box-shadow: 0 0 0 2px rgba(155, 124, 185, 0.2);
  outline: none;
}

/* Japanese-style cards */
.card-japanese {
  background-color: var(--card-bg);
  border: 1px solid var(--border-color);
  border-radius: 0.5rem;
  overflow: hidden;
  transition: all 0.3s ease;
}

.card-japanese:hover {
  box-shadow: 0 4px 12px var(--shadow-color);
}

/* Line clamp utilities */
.line-clamp-1 {
  overflow: hidden;
  display: -webkit-box;
  -webkit-box-orient: vertical;
  -webkit-line-clamp: 1;
}

.line-clamp-2 {
  overflow: hidden;
  display: -webkit-box;
  -webkit-box-orient: vertical;
  -webkit-line-clamp: 2;
}

.line-clamp-3 {
  overflow: hidden;
  display: -webkit-box;
  -webkit-box-orient: vertical;
  -webkit-line-clamp: 3;
}


================================================
FILE: src/app/layout.tsx
================================================
import type { Metadata } from "next";
import "./globals.css";
import { ThemeProvider } from "next-themes";
import { LanguageProvider } from "@/contexts/LanguageContext";

export const metadata: Metadata = {
  title: "Deepwiki Open Source | Sheing Ng",
  description: "Created by Sheing Ng",
};

export default function RootLayout({
  children
}: Readonly<{
  children: React.ReactNode;
}>) {
  return (
    <html lang="en" suppressHydrationWarning>
      <head>
        <link rel="preconnect" href="https://fonts.googleapis.com" />
        <link rel="preconnect" href="https://fonts.gstatic.com" crossOrigin="anonymous" />
        <link
          href="https://fonts.googleapis.com/css2?family=Geist+Mono&family=Noto+Sans+JP:wght@400;500;700&family=Noto+Serif+JP:wght@400;500;700&display=swap"
          rel="stylesheet"
        />
      </head>
      <body className="antialiased">
        <ThemeProvider attribute="data-theme" defaultTheme="system" enableSystem>
          <LanguageProvider>
            {children}
          </LanguageProvider>
        </ThemeProvider>
      </body>
    </html>
  );
}


================================================
FILE: src/app/page.tsx
================================================
'use client';

import React, { useState, useEffect } from 'react';
import { useRouter } from 'next/navigation';
import Link from 'next/link';
import { FaWikipediaW, FaGithub, FaCoffee, FaTwitter } from 'react-icons/fa';
import ThemeToggle from '@/components/theme-toggle';
import Mermaid from '../components/Mermaid';
import ConfigurationModal from '@/components/ConfigurationModal';
import ProcessedProjects from '@/components/ProcessedProjects';
import { extractUrlPath, extractUrlDomain } from '@/utils/urlDecoder';
import { useProcessedProjects } from '@/hooks/useProcessedProjects';

import { useLanguage } from '@/contexts/LanguageContext';

// Define the demo mermaid charts outside the component
const DEMO_FLOW_CHART = `graph TD
  A[Code Repository] --> B[DeepWiki]
  B --> C[Architecture Diagrams]
  B --> D[Component Relationships]
  B --> E[Data Flow]
  B --> F[Process Workflows]

  style A fill:#f9d3a9,stroke:#d86c1f
  style B fill:#d4a9f9,stroke:#6c1fd8
  style C fill:#a9f9d3,stroke:#1fd86c
  style D fill:#a9d3f9,stroke:#1f6cd8
  style E fill:#f9a9d3,stroke:#d81f6c
  style F fill:#d3f9a9,stroke:#6cd81f`;

const DEMO_SEQUENCE_CHART = `sequenceDiagram
  participant User
  participant DeepWiki
  participant GitHub

  User->>DeepWiki: Enter repository URL
  DeepWiki->>GitHub: Request repository data
  GitHub-->>DeepWiki: Return repository data
  DeepWiki->>DeepWiki: Process and analyze code
  DeepWiki-->>User: Display wiki with diagrams

  %% Add a note to make text more visible
  Note over User,GitHub: DeepWiki supports sequence diagrams for visualizing interactions`;

export default function Home() {
  const router = useRouter();
  const { language, setLanguage, messages, supportedLanguages } = useLanguage();
  const { projects, isLoading: projectsLoading } = useProcessedProjects();

  // Create a simple translation function
  const t = (key: string, params: Record<string, string | number> = {}): string => {
    // Split the key by dots to access nested properties
    const keys = key.split('.');
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    let value: any = messages;

    // Navigate through the nested properties
    for (const k of keys) {
      if (value && typeof value === 'object' && k in value) {
        value = value[k];
      } else {
        // Return the key if the translation is not found
        return key;
      }
    }

    // If the value is a string, replace parameters
    if (typeof value === 'string') {
      return Object.entries(params).reduce((acc: string, [paramKey, paramValue]) => {
        return acc.replace(`{${paramKey}}`, String(paramValue));
      }, value);
    }

    // Return the key if the value is not a string
    return key;
  };

  const [repositoryInput, setRepositoryInput] = useState('https://github.com/AsyncFuncAI/deepwiki-open');

  const REPO_CONFIG_CACHE_KEY = 'deepwikiRepoConfigCache';

  const loadConfigFromCache = (repoUrl: string) => {
    if (!repoUrl) return;
    try {
      const cachedConfigs = localStorage.getItem(REPO_CONFIG_CACHE_KEY);
      if (cachedConfigs) {
        const configs = JSON.parse(cachedConfigs);
        const config = configs[repoUrl.trim()];
        if (config) {
          setSelectedLanguage(config.selectedLanguage || language);
          setIsComprehensiveView(config.isComprehensiveView === undefined ? true : config.isComprehensiveView);
          setProvider(config.provider || '');
          setModel(config.model || '');
          setIsCustomModel(config.isCustomModel || false);
          setCustomModel(config.customModel || '');
          setSelectedPlatform(config.selectedPlatform || 'github');
          setExcludedDirs(config.excludedDirs || '');
          setExcludedFiles(config.excludedFiles || '');
          setIncludedDirs(config.includedDirs || '');
          setIncludedFiles(config.includedFiles || '');
        }
      }
    } catch (error) {
      console.error('Error loading config from localStorage:', error);
    }
  };

  const handleRepositoryInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
    const newRepoUrl = e.target.value;
    setRepositoryInput(newRepoUrl);
    if (newRepoUrl.trim() === "") {
      // Optionally reset fields if input is cleared
    } else {
        loadConfigFromCache(newRepoUrl);
    }
  };

  useEffect(() => {
    if (repositoryInput) {
      loadConfigFromCache(repositoryInput);
    }
  }, []);

  // Provider-based model selection state
  const [provider, setProvider] = useState<string>('');
  const [model, setModel] = useState<string>('');
  const [isCustomModel, setIsCustomModel] = useState<boolean>(false);
  const [customModel, setCustomModel] = useState<string>('');

  // Wiki type state - default to comprehensive view
  const [isComprehensiveView, setIsComprehensiveView] = useState<boolean>(true);

  const [excludedDirs, setExcludedDirs] = useState('');
  const [excludedFiles, setExcludedFiles] = useState('');
  const [includedDirs, setIncludedDirs] = useState('');
  const [includedFiles, setIncludedFiles] = useState('');
  const [selectedPlatform, setSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>('github');
  const [accessToken, setAccessToken] = useState('');
  const [error, setError] = useState<string | null>(null);
  const [isSubmitting, setIsSubmitting] = useState(false);
  const [selectedLanguage, setSelectedLanguage] = useState<string>(language);

  // Authentication state
  const [authRequired, setAuthRequired] = useState<boolean>(false);
  const [authCode, setAuthCode] = useState<string>('');
  const [isAuthLoading, setIsAuthLoading] = useState<boolean>(true);

  // Sync the language context with the selectedLanguage state
  useEffect(() => {
    setLanguage(selectedLanguage);
  }, [selectedLanguage, setLanguage]);

  // Fetch authentication status on component mount
  useEffect(() => {
    const fetchAuthStatus = async () => {
      try {
        setIsAuthLoading(true);
        const response = await fetch('/api/auth/status');
        if (!response.ok) {
          throw new Error(`HTTP error! status: ${response.status}`);
        }
        const data = await response.json();
        setAuthRequired(data.auth_required);
      } catch (err) {
        console.error("Failed to fetch auth status:", err);
        // Assuming auth is required if fetch fails to avoid blocking UI for safety
        setAuthRequired(true);
      } finally {
        setIsAuthLoading(false);
      }
    };

    fetchAuthStatus();
  }, []);

  // Parse repository URL/input and extract owner and repo
  const parseRepositoryInput = (input: string): {
    owner: string,
    repo: string,
    type: string,
    fullPath?: string,
    localPath?: string
  } | null => {
    input = input.trim();

    let owner = '', repo = '', type = 'github', fullPath;
    let localPath: string | undefined;

    // Handle Windows absolute paths (e.g., C:\path\to\folder)
    const windowsPathRegex = /^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$/;
    const customGitRegex = /^(?:https?:\/\/)?([^\/]+)\/(.+?)\/([^\/]+)(?:\.git)?\/?$/;

    if (windowsPathRegex.test(input)) {
      type = 'local';
      localPath = input;
      repo = input.split('\\').pop() || 'local-repo';
      owner = 'local';
    }
    // Handle Unix/Linux absolute paths (e.g., /path/to/folder)
    else if (input.startsWith('/')) {
      type = 'local';
      localPath = input;
      repo = input.split('/').filter(Boolean).pop() || 'local-repo';
      owner = 'local';
    }
    else if (customGitRegex.test(input)) {
      // Detect repository type based on domain
      const domain = extractUrlDomain(input);
      if (domain?.includes('github.com')) {
        type = 'github';
      } else if (domain?.includes('gitlab.com') || domain?.includes('gitlab.')) {
        type = 'gitlab';
      } else if (domain?.includes('bitbucket.org') || domain?.includes('bitbucket.')) {
        type = 'bitbucket';
      } else {
        type = 'web'; // fallback for other git hosting services
      }

      fullPath = extractUrlPath(input)?.replace(/\.git$/, '');
      const parts = fullPath?.split('/') ?? [];
      if (parts.length >= 2) {
        repo = parts[parts.length - 1] || '';
        owner = parts[parts.length - 2] || '';
      }
    }
    // Unsupported URL formats
    else {
      console.error('Unsupported URL format:', input);
      return null;
    }

    if (!owner || !repo) {
      return null;
    }

    // Clean values
    owner = owner.trim();
    repo = repo.trim();

    // Remove .git suffix if present
    if (repo.endsWith('.git')) {
      repo = repo.slice(0, -4);
    }

    return { owner, repo, type, fullPath, localPath };
  };

  // State for configuration modal
  const [isConfigModalOpen, setIsConfigModalOpen] = useState(false);

  const handleFormSubmit = (e: React.FormEvent) => {
    e.preventDefault();

    // Parse repository input to validate
    const parsedRepo = parseRepositoryInput(repositoryInput);

    if (!parsedRepo) {
      setError('Invalid repository format. Use "owner/repo", GitHub/GitLab/BitBucket URL, or a local folder path like "/path/to/folder" or "C:\\path\\to\\folder".');
      return;
    }

    // If valid, open the configuration modal
    setError(null);
    setIsConfigModalOpen(true);
  };

  const validateAuthCode = async () => {
    try {
      if(authRequired) {
        if(!authCode) {
          return false;
        }
        const response = await fetch('/api/auth/validate', {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
          },
          body: JSON.stringify({'code': authCode})
        });
        if (!response.ok) {
          return false;
        }
        const data = await response.json();
        return data.success || false;
      }
    } catch {
      return false;
    }
    return true;
  };

  const handleGenerateWiki = async () => {

    // Check authorization code
    const validation = await validateAuthCode();
    if(!validation) {
      setError(`Failed to validate the authorization code`);
      console.error(`Failed to validate the authorization code`);
      setIsConfigModalOpen(false);
      return;
    }

    // Prevent multiple submissions
    if (isSubmitting) {
      console.log('Form submission already in progress, ignoring duplicate click');
      return;
    }

    try {
      const currentRepoUrl = repositoryInput.trim();
      if (currentRepoUrl) {
        const existingConfigs = JSON.parse(localStorage.getItem(REPO_CONFIG_CACHE_KEY) || '{}');
        const configToSave = {
          selectedLanguage,
          isComprehensiveView,
          provider,
          model,
          isCustomModel,
          customModel,
          selectedPlatform,
          excludedDirs,
          excludedFiles,
          includedDirs,
          includedFiles,
        };
        existingConfigs[currentRepoUrl] = configToSave;
        localStorage.setItem(REPO_CONFIG_CACHE_KEY, JSON.stringify(existingConfigs));
      }
    } catch (error) {
      console.error('Error saving config to localStorage:', error);
    }

    setIsSubmitting(true);

    // Parse repository input
    const parsedRepo = parseRepositoryInput(repositoryInput);

    if (!parsedRepo) {
      setError('Invalid repository format. Use "owner/repo", GitHub/GitLab/BitBucket URL, or a local folder path like "/path/to/folder" or "C:\\path\\to\\folder".');
      setIsSubmitting(false);
      return;
    }

    const { owner, repo, type, localPath } = parsedRepo;

    // Store tokens in query params if they exist
    const params = new URLSearchParams();
    if (accessToken) {
      params.append('token', accessToken);
    }
    // Always include the type parameter
    params.append('type', (type == 'local' ? type : selectedPlatform) || 'github');
    // Add local path if it exists
    if (localPath) {
      params.append('local_path', encodeURIComponent(localPath));
    } else {
      params.append('repo_url', encodeURIComponent(repositoryInput));
    }
    // Add model parameters
    params.append('provider', provider);
    params.append('model', model);
    if (isCustomModel && customModel) {
      params.append('custom_model', customModel);
    }
    // Add file filters configuration
    if (excludedDirs) {
      params.append('excluded_dirs', excludedDirs);
    }
    if (excludedFiles) {
      params.append('excluded_files', excludedFiles);
    }
    if (includedDirs) {
      params.append('included_dirs', includedDirs);
    }
    if (includedFiles) {
      params.append('included_files', includedFiles);
    }

    // Add language parameter
    params.append('language', selectedLanguage);

    // Add comprehensive parameter
    params.append('comprehensive', isComprehensiveView.toString());

    const queryString = params.toString() ? `?${params.toString()}` : '';

    // Navigate to the dynamic route
    router.push(`/${owner}/${repo}${queryString}`);

    // The isSubmitting state will be reset when the component unmounts during navigation
  };

  return (
    <div className="h-screen paper-texture p-4 md:p-8 flex flex-col">
      <header className="max-w-6xl mx-auto mb-6 h-fit w-full">
        <div
          className="flex flex-col md:flex-row md:items-center md:justify-between gap-4 bg-[var(--card-bg)] rounded-lg shadow-custom border border-[var(--border-color)] p-4">
          <div className="flex items-center">
            <div className="bg-[var(--accent-primary)] p-2 rounded-lg mr-3">
              <FaWikipediaW className="text-2xl text-white" />
            </div>
            <div className="mr-6">
              <h1 className="text-xl md:text-2xl font-bold text-[var(--accent-primary)]">{t('common.appName')}</h1>
              <div className="flex flex-wrap items-baseline gap-x-2 md:gap-x-3 mt-0.5">
                <p className="text-xs text-[var(--muted)] whitespace-nowrap">{t('common.tagline')}</p>
                <div className="hidden md:inline-block">
                  <Link href="/wiki/projects"
                    className="text-xs font-medium text-[var(--accent-primary)] hover:text-[var(--highlight)] hover:underline whitespace-nowrap">
                    {t('nav.wikiProjects')}
                  </Link>
                </div>
              </div>
            </div>
          </div>

          <form onSubmit={handleFormSubmit} className="flex flex-col gap-3 w-full max-w-3xl">
            {/* Repository URL input and submit button */}
            <div className="flex flex-col sm:flex-row gap-2">
              <div className="relative flex-1">
                <input
                  type="text"
                  value={repositoryInput}
                  onChange={handleRepositoryInputChange}
                  placeholder={t('form.repoPlaceholder') || "owner/repo, GitHub/GitLab/BitBucket URL, or local folder path"}
                  className="input-japanese block w-full pl-10 pr-3 py-2.5 border-[var(--border-color)] rounded-lg bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
                />
                {error && (
                  <div className="text-[var(--highlight)] text-xs mt-1">
                    {error}
                  </div>
                )}
              </div>
              <button
                type="submit"
                className="btn-japanese px-6 py-2.5 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed"
                disabled={isSubmitting}
              >
                {isSubmitting ? t('common.processing') : t('common.generateWiki')}
              </button>
            </div>
          </form>

          {/* Configuration Modal */}
          <ConfigurationModal
            isOpen={isConfigModalOpen}
            onClose={() => setIsConfigModalOpen(false)}
            repositoryInput={repositoryInput}
            selectedLanguage={selectedLanguage}
            setSelectedLanguage={setSelectedLanguage}
            supportedLanguages={supportedLanguages}
            isComprehensiveView={isComprehensiveView}
            setIsComprehensiveView={setIsComprehensiveView}
            provider={provider}
            setProvider={setProvider}
            model={model}
            setModel={setModel}
            isCustomModel={isCustomModel}
            setIsCustomModel={setIsCustomModel}
            customModel={customModel}
            setCustomModel={setCustomModel}
            selectedPlatform={selectedPlatform}
            setSelectedPlatform={setSelectedPlatform}
            accessToken={accessToken}
            setAccessToken={setAccessToken}
            excludedDirs={excludedDirs}
            setExcludedDirs={setExcludedDirs}
            excludedFiles={excludedFiles}
            setExcludedFiles={setExcludedFiles}
            includedDirs={includedDirs}
            setIncludedDirs={setIncludedDirs}
            includedFiles={includedFiles}
            setIncludedFiles={setIncludedFiles}
            onSubmit={handleGenerateWiki}
            isSubmitting={isSubmitting}
            authRequired={authRequired}
            authCode={authCode}
            setAuthCode={setAuthCode}
            isAuthLoading={isAuthLoading}
          />

        </div>
      </header>

      <main className="flex-1 max-w-6xl mx-auto w-full overflow-y-auto">
        <div
          className="min-h-full flex flex-col items-center p-8 pt-10 bg-[var(--card-bg)] rounded-lg shadow-custom card-japanese">

          {/* Conditionally show processed projects or welcome content */}
          {!projectsLoading && projects.length > 0 ? (
            <div className="w-full">
              {/* Header section for existing projects */}
              <div className="flex flex-col items-center w-full max-w-2xl mb-8 mx-auto">
                <div className="flex flex-col sm:flex-row items-center mb-6 gap-4">
                  <div className="relative">
                    <div className="absolute -inset-1 bg-[var(--accent-primary)]/20 rounded-full blur-md"></div>
                    <FaWikipediaW className="text-5xl text-[var(--accent-primary)] relative z-10" />
                  </div>
                  <div className="text-center sm:text-left">
                    <h2 className="text-2xl font-bold text-[var(--foreground)] font-serif mb-1">{t('projects.existingProjects')}</h2>
                    <p className="text-[var(--accent-primary)] text-sm max-w-md">{t('projects.browseExisting')}</p>
                  </div>
                </div>
              </div>

              {/* Show processed projects */}
              <ProcessedProjects
                showHeader={false}
                maxItems={6}
                messages={messages}
                className="w-full"
              />
            </div>
          ) : (
            <>
              {/* Header section */}
              <div className="flex flex-col items-center w-full max-w-2xl mb-8">
                <div className="flex flex-col sm:flex-row items-center mb-6 gap-4">
                  <div className="relative">
                    <div className="absolute -inset-1 bg-[var(--accent-primary)]/20 rounded-full blur-md"></div>
                    <FaWikipediaW className="text-5xl text-[var(--accent-primary)] relative z-10" />
                  </div>
                  <div className="text-center sm:text-left">
                    <h2 className="text-2xl font-bold text-[var(--foreground)] font-serif mb-1">{t('home.welcome')}</h2>
                    <p className="text-[var(--accent-primary)] text-sm max-w-md">{t('home.welcomeTagline')}</p>
                  </div>
                </div>

                <p className="text-[var(--foreground)] text-center mb-8 text-lg leading-relaxed">
                  {t('home.description')}
                </p>
              </div>

          {/* Quick Start section - redesigned for better spacing */}
          <div
            className="w-full max-w-2xl mb-10 bg-[var(--accent-primary)]/5 border border-[var(--accent-primary)]/20 rounded-lg p-5">
            <h3 className="text-sm font-semibold text-[var(--accent-primary)] mb-3 flex items-center">
              <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4 mr-2" fill="none" viewBox="0 0 24 24"
                stroke="currentColor">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
                  d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
              </svg>
              {t('home.quickStart')}
            </h3>
            <p className="text-sm text-[var(--foreground)] mb-3">{t('home.enterRepoUrl')}</p>
            <div className="grid grid-cols-1 gap-3 text-xs text-[var(--muted)]">
              <div
                className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
              >https://github.com/AsyncFuncAI/deepwiki-open
              </div>
              <div
                className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
              >https://gitlab.com/gitlab-org/gitlab
              </div>
              <div
                className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
              >AsyncFuncAI/deepwiki-open
              </div>
              <div
                className="bg-[var(--background)]/70 p-3 rounded border border-[var(--border-color)] font-mono overflow-x-hidden whitespace-nowrap"
              >https://bitbucket.org/atlassian/atlaskit
              </div>
            </div>
          </div>

          {/* Visualization section - improved for better visibility */}
          <div
            className="w-full max-w-2xl mb-8 bg-[var(--background)]/70 rounded-lg p-6 border border-[var(--border-color)]">
            <div className="flex flex-col sm:flex-row items-start sm:items-center gap-2 mb-4">
              <svg xmlns="http://www.w3.org/2000/svg"
                className="h-5 w-5 text-[var(--accent-primary)] flex-shrink-0 mt-0.5 sm:mt-0" fill="none"
                viewBox="0 0 24 24" stroke="currentColor">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
                  d="M9 12l2 2 4-4m5.618-4.016A11.955 11.955 0 0112 2.944a11.955 11.955 0 01-8.618 3.04A12.02 12.02 0 003 9c0 5.591 3.824 10.29 9 11.622 5.176-1.332 9-6.03 9-11.622 0-1.042-.133-2.052-.382-3.016z" />
              </svg>
              <h3 className="text-base font-semibold text-[var(--foreground)] font-serif">{t('home.advancedVisualization')}</h3>
            </div>
            <p className="text-sm text-[var(--foreground)] mb-5 leading-relaxed">
              {t('home.diagramDescription')}
            </p>

            {/* Diagrams with improved layout */}
            <div className="grid grid-cols-1 gap-6">
              <div className="bg-[var(--card-bg)] p-4 rounded-lg border border-[var(--border-color)] shadow-custom">
                <h4 className="text-sm font-medium text-[var(--foreground)] mb-3 font-serif">{t('home.flowDiagram')}</h4>
                <Mermaid chart={DEMO_FLOW_CHART} />
              </div>

              <div className="bg-[var(--card-bg)] p-4 rounded-lg border border-[var(--border-color)] shadow-custom">
                <h4 className="text-sm font-medium text-[var(--foreground)] mb-3 font-serif">{t('home.sequenceDiagram')}</h4>
                <Mermaid chart={DEMO_SEQUENCE_CHART} />
              </div>
            </div>
          </div>
            </>
          )}
        </div>
      </main>

      <footer className="max-w-6xl mx-auto mt-8 flex flex-col gap-4 w-full">
        <div
          className="flex flex-col sm:flex-row justify-between items-center gap-4 bg-[var(--card-bg)] rounded-lg p-4 border border-[var(--border-color)] shadow-custom">
          <p className="text-[var(--muted)] text-sm font-serif">{t('footer.copyright')}</p>

          <div className="flex items-center gap-6">
            <div className="flex items-center space-x-5">
              <a href="https://github.com/AsyncFuncAI/deepwiki-open" target="_blank" rel="noopener noreferrer"
                className="text-[var(--muted)] hover:text-[var(--accent-primary)] transition-colors">
                <FaGithub className="text-xl" />
              </a>
              <a href="https://buymeacoffee.com/sheing" target="_blank" rel="noopener noreferrer"
                className="text-[var(--muted)] hover:text-[var(--accent-primary)] transition-colors">
                <FaCoffee className="text-xl" />
              </a>
              <a href="https://x.com/sashimikun_void" target="_blank" rel="noopener noreferrer"
                className="text-[var(--muted)] hover:text-[var(--accent-primary)] transition-colors">
                <FaTwitter className="text-xl" />
              </a>
            </div>
            <ThemeToggle />
          </div>
        </div>
      </footer>
    </div>
  );
}

================================================
FILE: src/app/wiki/projects/page.tsx
================================================
'use client';

import React from 'react';
import ProcessedProjects from '@/components/ProcessedProjects';
import { useLanguage } from '@/contexts/LanguageContext';

export default function WikiProjectsPage() {
  const { messages } = useLanguage();

  return (
    <div className="container mx-auto p-4">
      <ProcessedProjects
        showHeader={true}
        messages={messages}
        className=""
      />
    </div>
  );
}

================================================
FILE: src/components/Ask.tsx
================================================
'use client';

import React, {useState, useRef, useEffect} from 'react';
import {FaChevronLeft, FaChevronRight } from 'react-icons/fa';
import Markdown from './Markdown';
import { useLanguage } from '@/contexts/LanguageContext';
import RepoInfo from '@/types/repoinfo';
import getRepoUrl from '@/utils/getRepoUrl';
import ModelSelectionModal from './ModelSelectionModal';
import { createChatWebSocket, closeWebSocket, ChatCompletionRequest } from '@/utils/websocketClient';

interface Model {
  id: string;
  name: string;
}

interface Provider {
  id: string;
  name: string;
  models: Model[];
  supportsCustomModel?: boolean;
}

interface Message {
  role: 'user' | 'assistant' | 'system';
  content: string;
}

interface ResearchStage {
  title: string;
  content: string;
  iteration: number;
  type: 'plan' | 'update' | 'conclusion';
}

interface AskProps {
  repoInfo: RepoInfo;
  provider?: string;
  model?: string;
  isCustomModel?: boolean;
  customModel?: string;
  language?: string;
  onRef?: (ref: { clearConversation: () => void }) => void;
}

const Ask: React.FC<AskProps> = ({
  repoInfo,
  provider = '',
  model = '',
  isCustomModel = false,
  customModel = '',
  language = 'en',
  onRef
}) => {
  const [question, setQuestion] = useState('');
  const [response, setResponse] = useState('');
  const [isLoading, setIsLoading] = useState(false);
  const [deepResearch, setDeepResearch] = useState(false);

  // Model selection state
  const [selectedProvider, setSelectedProvider] = useState(provider);
  const [selectedModel, setSelectedModel] = useState(model);
  const [isCustomSelectedModel, setIsCustomSelectedModel] = useState(isCustomModel);
  const [customSelectedModel, setCustomSelectedModel] = useState(customModel);
  const [isModelSelectionModalOpen, setIsModelSelectionModalOpen] = useState(false);
  const [isComprehensiveView, setIsComprehensiveView] = useState(true);

  // Get language context for translations
  const { messages } = useLanguage();

  // Research navigation state
  const [researchStages, setResearchStages] = useState<ResearchStage[]>([]);
  const [currentStageIndex, setCurrentStageIndex] = useState(0);
  const [conversationHistory, setConversationHistory] = useState<Message[]>([]);
  const [researchIteration, setResearchIteration] = useState(0);
  const [researchComplete, setResearchComplete] = useState(false);
  const inputRef = useRef<HTMLInputElement>(null);
  const responseRef = useRef<HTMLDivElement>(null);
  const providerRef = useRef(provider);
  const modelRef = useRef(model);

  // Focus input on component mount
  useEffect(() => {
    if (inputRef.current) {
      inputRef.current.focus();
    }
  }, []);

  // Expose clearConversation method to parent component
  useEffect(() => {
    if (onRef) {
      onRef({ clearConversation });
    }
  }, [onRef]);

  // Scroll to bottom of response when it changes
  useEffect(() => {
    if (responseRef.current) {
      responseRef.current.scrollTop = responseRef.current.scrollHeight;
    }
  }, [response]);

  // Close WebSocket when component unmounts
  useEffect(() => {
    return () => {
      closeWebSocket(webSocketRef.current);
    };
  }, []);

  useEffect(() => {
    providerRef.current = provider;
    modelRef.current = model;
  }, [provider, model]);

  useEffect(() => {
    const fetchModel = async () => {
      try {
        setIsLoading(true);

        const response = await fetch('/api/models/config');
        if (!response.ok) {
          throw new Error(`Error fetching model configurations: ${response.status}`);
        }

        const data = await response.json();

        // use latest provider/model ref to check
        if(providerRef.current == '' || modelRef.current== '') {
          setSelectedProvider(data.defaultProvider);

          // Find the default provider and set its default model
          const selectedProvider = data.providers.find((p:Provider) => p.id === data.defaultProvider);
          if (selectedProvider && selectedProvider.models.length > 0) {
            setSelectedModel(selectedProvider.models[0].id);
          }
        } else {
          setSelectedProvider(providerRef.current);
          setSelectedModel(modelRef.current);
        }
      } catch (err) {
        console.error('Failed to fetch model configurations:', err);
      } finally {
        setIsLoading(false);
      }
    };
    if(provider == '' || model == '') {
      fetchModel()
    }
  }, [provider, model]);

  const clearConversation = () => {
    setQuestion('');
    setResponse('');
    setConversationHistory([]);
    setResearchIteration(0);
    setResearchComplete(false);
    setResearchStages([]);
    setCurrentStageIndex(0);
    if (inputRef.current) {
      inputRef.current.focus();
    }
  };
  const downloadresponse = () =>{
  const blob = new Blob([response], { type: 'text/markdown' });
  const url = URL.createObjectURL(blob);
  const a = document.createElement('a');
  a.href = url;
  a.download = `response-${new Date().toISOString().slice(0, 19).replace(/:/g, '-')}.md`;
  document.body.appendChild(a);
  a.click();
  document.body.removeChild(a);
  URL.revokeObjectURL(url);
}

  // Function to check if research is complete based on response content
  const checkIfResearchComplete = (content: string): boolean => {
    // Check for explicit final conclusion markers
    if (content.includes('## Final Conclusion')) {
      return true;
    }

    // Check for conclusion sections that don't indicate further research
    if ((content.includes('## Conclusion') || content.includes('## Summary')) &&
      !content.includes('I will now proceed to') &&
      !content.includes('Next Steps') &&
      !content.includes('next iteration')) {
      return true;
    }

    // Check for phrases that explicitly indicate completion
    if (content.includes('This concludes our research') ||
      content.includes('This completes our investigation') ||
      content.includes('This concludes the deep research process') ||
      content.includes('Key Findings and Implementation Details') ||
      content.includes('In conclusion,') ||
      (content.includes('Final') && content.includes('Conclusion'))) {
      return true;
    }

    // Check for topic-specific completion indicators
    if (content.includes('Dockerfile') &&
      (content.includes('This Dockerfile') || content.includes('The Dockerfile')) &&
      !content.includes('Next Steps') &&
      !content.includes('In the next iteration')) {
      return true;
    }

    return false;
  };

  // Function to extract research stages from the response
  const extractResearchStage = (content: string, iteration: number): ResearchStage | null => {
    // Check for research plan (first iteration)
    if (iteration === 1 && content.includes('## Research Plan')) {
      const planMatch = content.match(/## Research Plan([\s\S]*?)(?:## Next Steps|$)/);
      if (planMatch) {
        return {
          title: 'Research Plan',
          content: content,
          iteration: 1,
          type: 'plan'
        };
      }
    }

    // Check for research updates (iterations 1-4)
    if (iteration >= 1 && iteration <= 4) {
      const updateMatch = content.match(new RegExp(`## Research Update ${iteration}([\\s\\S]*?)(?:## Next Steps|$)`));
      if (updateMatch) {
        return {
          title: `Research Update ${iteration}`,
          content: content,
          iteration: iteration,
          type: 'update'
        };
      }
    }

    // Check for final conclusion
    if (content.includes('## Final Conclusion')) {
      const conclusionMatch = content.match(/## Final Conclusion([\s\S]*?)$/);
      if (conclusionMatch) {
        return {
          title: 'Final Conclusion',
          content: content,
          iteration: iteration,
          type: 'conclusion'
        };
      }
    }

    return null;
  };

  // Function to navigate to a specific research stage
  const navigateToStage = (index: number) => {
    if (index >= 0 && index < researchStages.length) {
      setCurrentStageIndex(index);
      setResponse(researchStages[index].content);
    }
  };

  // Function to navigate to the next research stage
  const navigateToNextStage = () => {
    if (currentStageIndex < researchStages.length - 1) {
      navigateToStage(currentStageIndex + 1);
    }
  };

  // Function to navigate to the previous research stage
  const navigateToPreviousStage = () => {
    if (currentStageIndex > 0) {
      navigateToStage(currentStageIndex - 1);
    }
  };

  // WebSocket reference
  const webSocketRef = useRef<WebSocket | null>(null);

  // Function to continue research automatically
  const continueResearch = async () => {
    if (!deepResearch || researchComplete || !response || isLoading) return;

    // Add a small delay to allow the user to read the current response
    await new Promise(resolve => setTimeout(resolve, 2000));

    setIsLoading(true);

    try {
      // Store the current response for use in the history
      const currentResponse = response;

      // Create a new message from the AI's previous response
      const newHistory: Message[] = [
        ...conversationHistory,
        {
          role: 'assistant',
          content: currentResponse
        },
        {
          role: 'user',
          content: '[DEEP RESEARCH] Continue the research'
        }
      ];

      // Update conversation history
      setConversationHistory(newHistory);

      // Increment research iteration
      const newIteration = researchIteration + 1;
      setResearchIteration(newIteration);

      // Clear previous response
      setResponse('');

      // Prepare the request body
      const requestBody: ChatCompletionRequest = {
        repo_url: getRepoUrl(repoInfo),
        type: repoInfo.type,
        messages: newHistory.map(msg => ({ role: msg.role as 'user' | 'assistant', content: msg.content })),
        provider: selectedProvider,
        model: isCustomSelectedModel ? customSelectedModel : selectedModel,
        language: language
      };

      // Add tokens if available
      if (repoInfo?.token) {
        requestBody.token = repoInfo.token;
      }

      // Close any existing WebSocket connection
      closeWebSocket(webSocketRef.current);

      let fullResponse = '';

      // Create a new WebSocket connection
      webSocketRef.current = createChatWebSocket(
        requestBody,
        // Message handler
        (message: string) => {
          fullResponse += message;
          setResponse(fullResponse);

          // Extract research stage if this is a deep research response
          if (deepResearch) {
            const stage = extractResearchStage(fullResponse, newIteration);
            if (stage) {
              // Add the stage to the research stages if it's not already there
              setResearchStages(prev => {
                // Check if we already have this stage
                const existingStageIndex = prev.findIndex(s => s.iteration === stage.iteration && s.type === stage.type);
                if (existingStageIndex >= 0) {
                  // Update existing stage
                  const newStages = [...prev];
                  newStages[existingStageIndex] = stage;
                  return newStages;
                } else {
                  // Add new stage
                  return [...prev, stage];
                }
              });

              // Update current stage index to the latest stage
              setCurrentStageIndex(researchStages.length);
            }
          }
        },
        // Error handler
        (error: Event) => {
          console.error('WebSocket error:', error);
          setResponse(prev => prev + '\n\nError: WebSocket connection failed. Falling back to HTTP...');

          // Fallback to HTTP if WebSocket fails
          fallbackToHttp(requestBody);
        },
        // Close handler
        () => {
          // Check if research is complete when the WebSocket closes
          const isComplete = checkIfResearchComplete(fullResponse);

          // Force completion after a maximum number of iterations (5)
          const forceComplete = newIteration >= 5;

          if (forceComplete && !isComplete) {
            // If we're forcing completion, append a comprehensive conclusion to the response
            const completionNote = "\n\n## Final Conclusion\nAfter multiple iterations of deep research, we've gathered significant insights about this topic. This concludes our investigation process, having reached the maximum number of research iterations. The findings presented across all iterations collectively form our comprehensive answer to the original question.";
            fullResponse += completionNote;
            setResponse(fullResponse);
            setResearchComplete(true);
          } else {
            setResearchComplete(isComplete);
          }

          setIsLoading(false);
        }
      );
    } catch (error) {
      console.error('Error during API call:', error);
      setResponse(prev => prev + '\n\nError: Failed to continue research. Please try again.');
      setResearchComplete(true);
      setIsLoading(false);
    }
  };

  // Fallback to HTTP if WebSocket fails
  const fallbackToHttp = async (requestBody: ChatCompletionRequest) => {
    try {
      // Make the API call using HTTP
      const apiResponse = await fetch(`/api/chat/stream`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(requestBody)
      });

      if (!apiResponse.ok) {
        throw new Error(`API error: ${apiResponse.status}`);
      }

      // Process the streaming response
      const reader = apiResponse.body?.getReader();
      const decoder = new TextDecoder();

      if (!reader) {
        throw new Error('Failed to get response reader');
      }

      // Read the stream
      let fullResponse = '';
      while (true) {
        const { done, value } = await reader.read();
        if (done) break;

        const chunk = decoder.decode(value, { stream: true });
        fullResponse += chunk;
        setResponse(fullResponse);

        // Extract research stage if this is a deep research response
        if (deepResearch) {
          const stage = extractResearchStage(fullResponse, researchIteration);
          if (stage) {
            // Add the stage to the research stages
            setResearchStages(prev => {
              const existingStageIndex = prev.findIndex(s => s.iteration === stage.iteration && s.type === stage.type);
              if (existingStageIndex >= 0) {
                const newStages = [...prev];
                newStages[existingStageIndex] = stage;
                return newStages;
              } else {
                return [...prev, stage];
              }
            });
          }
        }
      }

      // Check if research is complete
      const isComplete = checkIfResearchComplete(fullResponse);

      // Force completion after a maximum number of iterations (5)
      const forceComplete = researchIteration >= 5;

      if (forceComplete && !isComplete) {
        // If we're forcing completion, append a comprehensive conclusion to the response
        const completionNote = "\n\n## Final Conclusion\nAfter multiple iterations of deep research, we've gathered significant insights about this topic. This concludes our investigation process, having reached the maximum number of research iterations. The findings presented across all iterations collectively form our comprehensive answer to the original question.";
        fullResponse += completionNote;
        setResponse(fullResponse);
        setResearchComplete(true);
      } else {
        setResearchComplete(isComplete);
      }
    } catch (error) {
      console.error('Error during HTTP fallback:', error);
      setResponse(prev => prev + '\n\nError: Failed to get a response. Please try again.');
      setResearchComplete(true);
    } finally {
      setIsLoading(false);
    }
  };

  // Effect to continue research when response is updated
  useEffect(() => {
    if (deepResearch && response && !isLoading && !researchComplete) {
      const isComplete = checkIfResearchComplete(response);
      if (isComplete) {
        setResearchComplete(true);
      } else if (researchIteration > 0 && researchIteration < 5) {
        // Only auto-continue if we're already in a research process and haven't reached max iterations
        // Use setTimeout to avoid potential infinite loops
        const timer = setTimeout(() => {
          continueResearch();
        }, 1000);
        return () => clearTimeout(timer);
      }
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [response, isLoading, deepResearch, researchComplete, researchIteration]);

  // Effect to update research stages when the response changes
  useEffect(() => {
    if (deepResearch && response && !isLoading) {
      // Try to extract a research stage from the response
      const stage = extractResearchStage(response, researchIteration);
      if (stage) {
        // Add or update the stage in the research stages
        setResearchStages(prev => {
          // Check if we already have this stage
          const existingStageIndex = prev.findIndex(s => s.iteration === stage.iteration && s.type === stage.type);
          if (existingStageIndex >= 0) {
            // Update existing stage
            const newStages = [...prev];
            newStages[existingStageIndex] = stage;
            return newStages;
          } else {
            // Add new stage
            return [...prev, stage];
          }
        });

        // Update current stage index to point to this stage
        setCurrentStageIndex(prev => {
          const newIndex = researchStages.findIndex(s => s.iteration === stage.iteration && s.type === stage.type);
          return newIndex >= 0 ? newIndex : prev;
        });
      }
    }

    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [response, isLoading, deepResearch, researchIteration]);

  const handleSubmit = async (e: React.FormEvent) => {
    e.preventDefault();

    if (!question.trim() || isLoading) return;

    handleConfirmAsk();
  };

  // Handle confirm and send request
  const handleConfirmAsk = async () => {
    setIsLoading(true);
    setResponse('');
    setResearchIteration(0);
    setResearchComplete(false);

    try {
      // Create initial message
      const initialMessage: Message = {
        role: 'user',
        content: deepResearch ? `[DEEP RESEARCH] ${question}` : question
      };

      // Set initial conversation history
      const newHistory: Message[] = [initialMessage];
      setConversationHistory(newHistory);

      // Prepare request body
      const requestBody: ChatCompletionRequest = {
        repo_url: getRepoUrl(repoInfo),
        type: repoInfo.type,
        messages: newHistory.map(msg => ({ role: msg.role as 'user' | 'assistant', content: msg.content })),
        provider: selectedProvider,
        model: isCustomSelectedModel ? customSelectedModel : selectedModel,
        language: language
      };

      // Add tokens if available
      if (repoInfo?.token) {
        requestBody.token = repoInfo.token;
      }

      // Close any existing WebSocket connection
      closeWebSocket(webSocketRef.current);

      let fullResponse = '';

      // Create a new WebSocket connection
      webSocketRef.current = createChatWebSocket(
        requestBody,
        // Message handler
        (message: string) => {
          fullResponse += message;
          setResponse(fullResponse);

          // Extract research stage if this is a deep research response
          if (deepResearch) {
            const stage = extractResearchStage(fullResponse, 1); // First iteration
            if (stage) {
              // Add the stage to the research stages
              setResearchStages([stage]);
              setCurrentStageIndex(0);
            }
          }
        },
        // Error handler
        (error: Event) => {
          console.error('WebSocket error:', error);
          setResponse(prev => prev + '\n\nError: WebSocket connection failed. Falling back to HTTP...');

          // Fallback to HTTP if WebSocket fails
          fallbackToHttp(requestBody);
        },
        // Close handler
        () => {
          // If deep research is enabled, check if we should continue
          if (deepResearch) {
            const isComplete = checkIfResearchComplete(fullResponse);
            setResearchComplete(isComplete);

            // If not complete, start the research process
            if (!isComplete) {
              setResearchIteration(1);
              // The continueResearch function will be triggered by the useEffect
            }
          }

          setIsLoading(false);
        }
      );
    } catch (error) {
      console.error('Error during API call:', error);
      setResponse(prev => prev + '\n\nError: Failed to get a response. Please try again.');
      setResearchComplete(true);
      setIsLoading(false);
    }
  };

  const [buttonWidth, setButtonWidth] = useState(0);
  const buttonRef = useRef<HTMLButtonElement>(null);

  // Measure button width and update state
  useEffect(() => {
    if (buttonRef.current) {
      const width = buttonRef.current.offsetWidth;
      setButtonWidth(width);
    }
  }, [messages.ask?.askButton, isLoading]);

  return (
    <div>
      <div className="p-4">
        <div className="flex items-center justify-end mb-4">
          {/* Model selection button */}
          <button
            type="button"
            onClick={() => setIsModelSelectionModalOpen(true)}
            className="text-xs px-2.5 py-1 rounded border border-[var(--border-color)]/40 bg-[var(--background)]/10 text-[var(--foreground)]/80 hover:bg-[var(--background)]/30 hover:text-[var(--foreground)] transition-colors flex items-center gap-1.5"
          >
            <span>{selectedProvider}/{isCustomSelectedModel ? customSelectedModel : selectedModel}</span>
            <svg className="h-3.5 w-3.5 text-[var(--accent-primary)]/70" fill="none" viewBox="0 0 24 24" stroke="currentColor">
              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={1.5} d="M11 5H6a2 2 0 00-2 2v11a2 2 0 002 2h11a2 2 0 002-2v-5m-1.414-9.414a2 2 0 112.828 2.828L11.828 15H9v-2.828l8.586-8.586z" />
            </svg>
          </button>
        </div>

        {/* Question input */}
        <form onSubmit={handleSubmit} className="mt-4">
          <div className="relative">
            <input
              ref={inputRef}
              type="text"
              value={question}
              onChange={(e) => setQuestion(e.target.value)}
              placeholder={messages.ask?.placeholder || 'What would you like to know about this codebase?'}
              className="block w-full rounded-md border border-[var(--border-color)] bg-[var(--input-bg)] text-[var(--foreground)] px-5 py-3.5 text-base shadow-sm focus:border-[var(--accent-primary)] focus:ring-2 focus:ring-[var(--accent-primary)]/30 focus:outline-none transition-all"
              style={{ paddingRight: `${buttonWidth + 24}px` }}
              disabled={isLoading}
            />
            <button
              ref={buttonRef}
              type="submit"
              disabled={isLoading || !question.trim()}
              className={`absolute right-3 top-1/2 transform -translate-y-1/2 px-4 py-2 rounded-md font-medium text-sm ${
                isLoading || !question.trim()
                  ? 'bg-[var(--button-disabled-bg)] text-[var(--button-disabled-text)] cursor-not-allowed'
                  : 'bg-[var(--accent-primary)] text-white hover:bg-[var(--accent-primary)]/90 shadow-sm'
              } transition-all duration-200 flex items-center gap-1.5`}
            >
              {isLoading ? (
                <div className="w-4 h-4 rounded-full border-2 border-t-transparent border-white animate-spin" />
              ) : (
                <>
                  <svg className="w-4 h-4" fill="none" viewBox="0 0 24 24" stroke="currentColor">
                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M13 5l7 7-7 7M5 5l7 7-7 7" />
                  </svg>
                  <span>{messages.ask?.askButton || 'Ask'}</span>
                </>
              )}
            </button>
          </div>

          {/* Deep Research toggle */}
          <div className="flex items-center mt-2 justify-between">
            <div className="group relative">
              <label className="flex items-center cursor-pointer">
                <span className="text-xs text-gray-600 dark:text-gray-400 mr-2">Deep Research</span>
                <div className="relative">
                  <input
                    type="checkbox"
                    checked={deepResearch}
                    onChange={() => setDeepResearch(!deepResearch)}
                    className="sr-only"
                  />
                  <div className={`w-10 h-5 rounded-full transition-colors ${deepResearch ? 'bg-purple-600' : 'bg-gray-300 dark:bg-gray-600'}`}></div>
                  <div className={`absolute left-0.5 top-0.5 w-4 h-4 rounded-full bg-white transition-transform transform ${deepResearch ? 'translate-x-5' : ''}`}></div>
                </div>
              </label>
              <div className="absolute bottom-full left-0 mb-2 hidden group-hover:block bg-gray-800 text-white text-xs rounded p-2 w-72 z-10">
                <div className="relative">
                  <div className="absolute -bottom-2 left-4 w-0 h-0 border-l-4 border-r-4 border-t-4 border-transparent border-t-gray-800"></div>
                  <p className="mb-1">Deep Research conducts a multi-turn investigation process:</p>
                  <ul className="list-disc pl-4 text-xs">
                    <li><strong>Initial Research:</strong> Creates a research plan and initial findings</li>
                    <li><strong>Iteration 1:</strong> Explores specific aspects in depth</li>
                    <li><strong>Iteration 2:</strong> Investigates remaining questions</li>
                    <li><strong>Iterations 3-4:</strong> Dives deeper into complex areas</li>
                    <li><strong>Final Conclusion:</strong> Comprehensive answer based on all iterations</li>
                  </ul>
                  <p className="mt-1 text-xs italic">The AI automatically continues research until complete (up to 5 iterations)</p>
                </div>
              </div>
            </div>
            {deepResearch && (
              <div className="text-xs text-purple-600 dark:text-purple-400">
                Multi-turn research process enabled
                {researchIteration > 0 && !researchComplete && ` (iteration ${researchIteration})`}
                {researchComplete && ` (complete)`}
              </div>
            )}
          </div>
        </form>

        {/* Response area */}
        {response && (
          <div className="border-t border-gray-200 dark:border-gray-700 mt-4">
            <div
              ref={responseRef}
              className="p-4 max-h-[500px] overflow-y-auto"
            >
              <Markdown content={response} />
            </div>

            {/* Research navigation and clear button */}
            <div className="p-2 flex justify-between items-center border-t border-gray-200 dark:border-gray-700">
              {/* Research navigation */}
              {deepResearch && researchStages.length > 1 && (
                <div className="flex items-center space-x-2">
                  <button
                    onClick={() => navigateToPreviousStage()}
                    disabled={currentStageIndex === 0}
                    className={`p-1 rounded-md ${currentStageIndex === 0 ? 'text-gray-400 dark:text-gray-600' : 'text-gray-600 dark:text-gray-400 hover:bg-gray-200 dark:hover:bg-gray-700'}`}
                    aria-label="Previous stage"
                  >
                    <FaChevronLeft size={12} />
                  </button>

                  <div className="text-xs text-gray-600 dark:text-gray-400">
                    {currentStageIndex + 1} / {researchStages.length}
                  </div>

                  <button
                    onClick={() => navigateToNextStage()}
                    disabled={currentStageIndex === researchStages.length - 1}
                    className={`p-1 rounded-md ${currentStageIndex === researchStages.length - 1 ? 'text-gray-400 dark:text-gray-600' : 'text-gray-600 dark:text-gray-400 hover:bg-gray-200 dark:hover:bg-gray-700'}`}
                    aria-label="Next stage"
                  >
                    <FaChevronRight size={12} />
                  </button>

                  <div className="text-xs text-gray-600 dark:text-gray-400 ml-2">
                    {researchStages[currentStageIndex]?.title || `Stage ${currentStageIndex + 1}`}
                  </div>
                </div>
              )}

            <div className="flex items-center space-x-2">
              {/* Download button */}
              <button
                onClick={downloadresponse}
                className="text-xs text-gray-500 dark:text-gray-400 hover:text-green-600 dark:hover:text-green-400 px-2 py-1 rounded-md hover:bg-gray-200 dark:hover:bg-gray-700 flex items-center gap-1"
                title="Download response as markdown file"
              >
                <svg className="w-3 h-3" fill="none" viewBox="0 0 24 24" stroke="currentColor">
                  <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 10v6m0 0l-3-3m3 3l3-3m2 8H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
                </svg>
                Download
              </button>

              {/* Clear button */}
              <button
                id="ask-clear-conversation"
                onClick={clearConversation}
                className="text-xs text-gray-500 dark:text-gray-400 hover:text-purple-600 dark:hover:text-purple-400 px-2 py-1 rounded-md hover:bg-gray-200 dark:hover:bg-gray-700"
              >
                Clear conversation
              </button>
            </div>
              </div>
          </div>
        )}

        {/* Loading indicator */}
        {isLoading && !response && (
          <div className="p-4 border-t border-gray-200 dark:border-gray-700">
            <div className="flex items-center space-x-2">
              <div className="animate-pulse flex space-x-1">
                <div className="h-2 w-2 bg-purple-600 rounded-full"></div>
                <div className="h-2 w-2 bg-purple-600 rounded-full"></div>
                <div className="h-2 w-2 bg-purple-600 rounded-full"></div>
              </div>
              <span className="text-xs text-gray-500 dark:text-gray-400">
                {deepResearch
                  ? (researchIteration === 0
                    ? "Planning research approach..."
                    : `Research iteration ${researchIteration} in progress...`)
                  : "Thinking..."}
              </span>
            </div>
            {deepResearch && (
              <div className="mt-2 text-xs text-gray-500 dark:text-gray-400 pl-5">
                <div className="flex flex-col space-y-1">
                  {researchIteration === 0 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-blue-500 rounded-full mr-2"></div>
                        <span>Creating research plan...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-green-500 rounded-full mr-2"></div>
                        <span>Identifying key areas to investigate...</span>
                      </div>
                    </>
                  )}
                  {researchIteration === 1 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-blue-500 rounded-full mr-2"></div>
                        <span>Exploring first research area in depth...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-green-500 rounded-full mr-2"></div>
                        <span>Analyzing code patterns and structures...</span>
                      </div>
                    </>
                  )}
                  {researchIteration === 2 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-amber-500 rounded-full mr-2"></div>
                        <span>Investigating remaining questions...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-purple-500 rounded-full mr-2"></div>
                        <span>Connecting findings from previous iterations...</span>
                      </div>
                    </>
                  )}
                  {researchIteration === 3 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-indigo-500 rounded-full mr-2"></div>
                        <span>Exploring deeper connections...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-blue-500 rounded-full mr-2"></div>
                        <span>Analyzing complex patterns...</span>
                      </div>
                    </>
                  )}
                  {researchIteration === 4 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-teal-500 rounded-full mr-2"></div>
                        <span>Refining research conclusions...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-cyan-500 rounded-full mr-2"></div>
                        <span>Addressing remaining edge cases...</span>
                      </div>
                    </>
                  )}
                  {researchIteration >= 5 && (
                    <>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-purple-500 rounded-full mr-2"></div>
                        <span>Finalizing comprehensive answer...</span>
                      </div>
                      <div className="flex items-center">
                        <div className="w-2 h-2 bg-green-500 rounded-full mr-2"></div>
                        <span>Synthesizing all research findings...</span>
                      </div>
                    </>
                  )}
                </div>
              </div>
            )}
          </div>
        )}
      </div>

      {/* Model Selection Modal */}
      <ModelSelectionModal
        isOpen={isModelSelectionModalOpen}
        onClose={() => setIsModelSelectionModalOpen(false)}
        provider={selectedProvider}
        setProvider={setSelectedProvider}
        model={selectedModel}
        setModel={setSelectedModel}
        isCustomModel={isCustomSelectedModel}
        setIsCustomModel={setIsCustomSelectedModel}
        customModel={customSelectedModel}
        setCustomModel={setCustomSelectedModel}
        isComprehensiveView={isComprehensiveView}
        setIsComprehensiveView={setIsComprehensiveView}
        showFileFilters={false}
        onApply={() => {
          console.log('Model selection applied:', selectedProvider, selectedModel);
        }}
        showWikiType={false}
        authRequired={false}
        isAuthLoading={false}
      />
    </div>
  );
};

export default Ask;


================================================
FILE: src/components/ConfigurationModal.tsx
================================================
'use client';

import React, { useState } from 'react';
import { useLanguage } from '@/contexts/LanguageContext';
import UserSelector from './UserSelector';
import TokenInput from './TokenInput';

interface ConfigurationModalProps {
  isOpen: boolean;
  onClose: () => void;

  // Repository input
  repositoryInput: string;

  // Language selection
  selectedLanguage: string;
  setSelectedLanguage: (value: string) => void;
  supportedLanguages: Record<string, string>;

  // Wiki type options
  isComprehensiveView: boolean;
  setIsComprehensiveView: (value: boolean) => void;

  // Model selection
  provider: string;
  setProvider: (value: string) => void;
  model: string;
  setModel: (value: string) => void;
  isCustomModel: boolean;
  setIsCustomModel: (value: boolean) => void;
  customModel: string;
  setCustomModel: (value: string) => void;

  // Platform selection
  selectedPlatform: 'github' | 'gitlab' | 'bitbucket';
  setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void;

  // Access token
  accessToken: string;
  setAccessToken: (value: string) => void;

  // File filter options
  excludedDirs: string;
  setExcludedDirs: (value: string) => void;
  excludedFiles: string;
  setExcludedFiles: (value: string) => void;
  includedDirs: string;
  setIncludedDirs: (value: string) => void;
  includedFiles: string;
  setIncludedFiles: (value: string) => void;

  // Form submission
  onSubmit: () => void;
  isSubmitting: boolean;

  // Authentication
  authRequired?: boolean;
  authCode?: string;
  setAuthCode?: (code: string) => void;
  isAuthLoading?: boolean;
}

export default function ConfigurationModal({
  isOpen,
  onClose,
  repositoryInput,
  selectedLanguage,
  setSelectedLanguage,
  supportedLanguages,
  isComprehensiveView,
  setIsComprehensiveView,
  provider,
  setProvider,
  model,
  setModel,
  isCustomModel,
  setIsCustomModel,
  customModel,
  setCustomModel,
  selectedPlatform,
  setSelectedPlatform,
  accessToken,
  setAccessToken,
  excludedDirs,
  setExcludedDirs,
  excludedFiles,
  setExcludedFiles,
  includedDirs,
  setIncludedDirs,
  includedFiles,
  setIncludedFiles,
  onSubmit,
  isSubmitting,
  authRequired,
  authCode,
  setAuthCode,
  isAuthLoading
}: ConfigurationModalProps) {
  const { messages: t } = useLanguage();

  // Show token section state
  const [showTokenSection, setShowTokenSection] = useState(false);

  if (!isOpen) return null;

  return (
    <div className="fixed inset-0 z-50 overflow-y-auto">
      <div className="flex min-h-screen items-center justify-center p-4 text-center bg-black/50">
        <div className="relative transform overflow-hidden rounded-lg bg-[var(--card-bg)] text-left shadow-xl transition-all sm:my-8 sm:max-w-2xl sm:w-full">
          {/* Modal header with close button */}
          <div className="flex items-center justify-between px-6 py-4 border-b border-[var(--border-color)]">
            <h3 className="text-lg font-medium text-[var(--accent-primary)]">
              <span className="text-[var(--accent-primary)]">{t.form?.configureWiki || 'Configure Wiki'}</span>
            </h3>
            <button
              type="button"
              onClick={onClose}
              className="text-[var(--muted)] hover:text-[var(--foreground)] focus:outline-none transition-colors"
            >
              <svg className="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
              </svg>
            </button>
          </div>

          {/* Modal body */}
          <div className="p-6 max-h-[70vh] overflow-y-auto">
            {/* Repository info */}
            <div className="mb-4">
              <label className="block text-sm font-medium text-[var(--foreground)] mb-2">
                {t.form?.repository || 'Repository'}
              </label>
              <div className="bg-[var(--background)]/70 p-3 rounded-md border border-[var(--border-color)] text-sm text-[var(--foreground)]">
                {repositoryInput}
              </div>
            </div>

            {/* Language selection */}
            <div className="mb-4">
              <label htmlFor="language-select" className="block text-sm font-medium text-[var(--foreground)] mb-2">
                {t.form?.wikiLanguage || 'Wiki Language'}
              </label>
              <select
                id="language-select"
                value={selectedLanguage}
                onChange={(e) => setSelectedLanguage(e.target.value)}
                className="input-japanese block w-full px-3 py-2 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
              >
                {
                  Object.entries(supportedLanguages).map(([key, value])=> <option key={key} value={key}>{value}</option>)
                }
              </select>
            </div>

            {/* Wiki Type Selector - more compact version */}
            <div className="mb-4">
              <label className="block text-sm font-medium text-[var(--foreground)] mb-2">
                {t.form?.wikiType || 'Wiki Type'}
              </label>
              <div className="flex gap-3">
                <button
                  type="button"
                  onClick={() => setIsComprehensiveView(true)}
                  className={`flex-1 flex items-center justify-between p-2 rounded-md border transition-colors ${
                    isComprehensiveView
                      ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)]/30 text-[var(--accent-primary)]'
                      : 'bg-[var(--background)]/50 border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                  }`}
                >
                  <div className="flex items-center">
                    <div className="text-left">
                      <div className="font-medium text-sm">{t.form?.comprehensive || 'Comprehensive'}</div>
                      <div className="text-xs opacity-80">
                        {t.form?.comprehensiveDescription || 'Detailed wiki with structured sections'}
                      </div>
                    </div>
                  </div>
                  {isComprehensiveView && (
                    <div className="ml-2 h-4 w-4 rounded-full bg-[var(--accent-primary)]/20 flex items-center justify-center">
                      <div className="h-2 w-2 rounded-full bg-[var(--accent-primary)]"></div>
                    </div>
                  )}
                </button>

                <button
                  type="button"
                  onClick={() => setIsComprehensiveView(false)}
                  className={`flex-1 flex items-center justify-between p-2 rounded-md border transition-colors ${
                    !isComprehensiveView
                      ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)]/30 text-[var(--accent-primary)]'
                      : 'bg-[var(--background)]/50 border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                  }`}
                >
                  <div className="flex items-center">
                    <div className="text-left">
                      <div className="font-medium text-sm">{t.form?.concise || 'Concise'}</div>
                      <div className="text-xs opacity-80">
                        {t.form?.conciseDescription || 'Simplified wiki with fewer pages'}
                      </div>
                    </div>
                  </div>
                  {!isComprehensiveView && (
                    <div className="ml-2 h-4 w-4 rounded-full bg-[var(--accent-primary)]/20 flex items-center justify-center">
                      <div className="h-2 w-2 rounded-full bg-[var(--accent-primary)]"></div>
                    </div>
                  )}
                </button>
              </div>
            </div>

            {/* Model Selector */}
            <div className="mb-4">
              <UserSelector
                provider={provider}
                setProvider={setProvider}
                model={model}
                setModel={setModel}
                isCustomModel={isCustomModel}
                setIsCustomModel={setIsCustomModel}
                customModel={customModel}
                setCustomModel={setCustomModel}
                showFileFilters={true}
                excludedDirs={excludedDirs}
                setExcludedDirs={setExcludedDirs}
                excludedFiles={excludedFiles}
                setExcludedFiles={setExcludedFiles}
                includedDirs={includedDirs}
                setIncludedDirs={setIncludedDirs}
                includedFiles={includedFiles}
                setIncludedFiles={setIncludedFiles}
              />
            </div>

            {/* Access token section using TokenInput component */}
            <TokenInput
              selectedPlatform={selectedPlatform}
              setSelectedPlatform={setSelectedPlatform}
              accessToken={accessToken}
              setAccessToken={setAccessToken}
              showTokenSection={showTokenSection}
              onToggleTokenSection={() => setShowTokenSection(!showTokenSection)}
              allowPlatformChange={true}
            />

            {/* Authorization Code Input */}
            {isAuthLoading && (
              <div className="mb-4 p-3 bg-[var(--background)]/50 rounded-md border border-[var(--border-color)] text-sm text-[var(--muted)]">
                Loading authentication status...
              </div>
            )}
            {!isAuthLoading && authRequired && (
              <div className="mb-4 p-4 bg-[var(--background)]/50 rounded-md border border-[var(--border-color)]">
                <label htmlFor="authCode" className="block text-sm font-medium text-[var(--foreground)] mb-2">
                  {t.form?.authorizationCode || 'Authorization Code'}
                </label>
                <input
                  type="password"
                  id="authCode"
                  value={authCode || ''}
                  onChange={(e) => setAuthCode?.(e.target.value)}
                  className="input-japanese block w-full px-3 py-2 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
                  placeholder="Enter your authorization code"
                />
                 <div className="flex items-center mt-2 text-xs text-[var(--muted)]">
                  <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4 mr-1 text-[var(--muted)]"
                    fill="none" viewBox="0 0 24 24" stroke="currentColor">
                    <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
                      d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
                  </svg>
                   {t.form?.authorizationRequired || 'Authentication is required to generate the wiki.'}
                </div>
              </div>
            )}
          </div>

          {/* Modal footer */}
          <div className="flex items-center justify-end gap-2 px-6 py-4 border-t border-[var(--border-color)]">
            <button
              type="button"
              onClick={onClose}
              className="px-4 py-2 text-sm font-medium rounded-md border border-[var(--border-color)]/50 text-[var(--muted)] bg-transparent hover:bg-[var(--background)] hover:text-[var(--foreground)] transition-colors"
            >
              {t.common?.cancel || 'Cancel'}
            </button>
            <button
              type="button"
              onClick={onSubmit}
              disabled={isSubmitting}
              className="px-4 py-2 text-sm font-medium rounded-md border border-transparent bg-[var(--accent-primary)]/90 text-white hover:bg-[var(--accent-primary)] transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
            >
              {isSubmitting ? (t.common?.processing || 'Processing...') : (t.common?.generateWiki || 'Generate Wiki')}
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: src/components/Markdown.tsx
================================================
import React from 'react';
import ReactMarkdown from 'react-markdown';
import remarkGfm from 'remark-gfm';
import rehypeRaw from 'rehype-raw';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { tomorrow } from 'react-syntax-highlighter/dist/cjs/styles/prism';
import Mermaid from './Mermaid';

interface MarkdownProps {
  content: string;
}

const Markdown: React.FC<MarkdownProps> = ({ content }) => {
  // Define markdown components
  const MarkdownComponents: React.ComponentProps<typeof ReactMarkdown>['components'] = {
    p({ children, ...props }: { children?: React.ReactNode }) {
      return <p className="mb-3 text-sm leading-relaxed dark:text-white" {...props}>{children}</p>;
    },
    h1({ children, ...props }: { children?: React.ReactNode }) {
      return <h1 className="text-xl font-bold mt-6 mb-3 dark:text-white" {...props}>{children}</h1>;
    },
    h2({ children, ...props }: { children?: React.ReactNode }) {
      // Special styling for ReAct headings
      if (children && typeof children === 'string') {
        const text = children.toString();
        if (text.includes('Thought') || text.includes('Action') || text.includes('Observation') || text.includes('Answer')) {
          return (
            <h2
              className={`text-base font-bold mt-5 mb-3 p-2 rounded ${
                text.includes('Thought') ? 'bg-blue-100 dark:bg-blue-900/30 text-blue-800 dark:text-blue-300' :
                text.includes('Action') ? 'bg-green-100 dark:bg-green-900/30 text-green-800 dark:text-green-300' :
                text.includes('Observation') ? 'bg-amber-100 dark:bg-amber-900/30 text-amber-800 dark:text-amber-300' :
                text.includes('Answer') ? 'bg-purple-100 dark:bg-purple-900/30 text-purple-800 dark:text-purple-300' :
                'dark:text-white'
              }`}
              {...props}
            >
              {children}
            </h2>
          );
        }
      }
      return <h2 className="text-lg font-bold mt-5 mb-3 dark:text-white" {...props}>{children}</h2>;
    },
    h3({ children, ...props }: { children?: React.ReactNode }) {
      return <h3 className="text-base font-semibold mt-4 mb-2 dark:text-white" {...props}>{children}</h3>;
    },
    h4({ children, ...props }: { children?: React.ReactNode }) {
      return <h4 className="text-sm font-semibold mt-3 mb-2 dark:text-white" {...props}>{children}</h4>;
    },
    ul({ children, ...props }: { children?: React.ReactNode }) {
      return <ul className="list-disc pl-6 mb-4 text-sm dark:text-white space-y-2" {...props}>{children}</ul>;
    },
    ol({ children, ...props }: { children?: React.ReactNode }) {
      return <ol className="list-decimal pl-6 mb-4 text-sm dark:text-white space-y-2" {...props}>{children}</ol>;
    },
    li({ children, ...props }: { children?: React.ReactNode }) {
      return <li className="mb-2 text-sm leading-relaxed dark:text-white" {...props}>{children}</li>;
    },
    a({ children, href, ...props }: { children?: React.ReactNode; href?: string }) {
      return (
        <a
          href={href}
          className="text-purple-600 dark:text-purple-400 hover:underline font-medium"
          target="_blank"
          rel="noopener noreferrer"
          {...props}
        >
          {children}
        </a>
      );
    },
    blockquote({ children, ...props }: { children?: React.ReactNode }) {
      return (
        <blockquote
          className="border-l-4 border-gray-300 dark:border-gray-700 pl-4 py-1 text-gray-700 dark:text-gray-300 italic my-4 text-sm"
          {...props}
        >
          {children}
        </blockquote>
      );
    },
    table({ children, ...props }: { children?: React.ReactNode }) {
      return (
        <div className="overflow-x-auto my-6 rounded-md">
          <table className="min-w-full text-sm border-collapse" {...props}>
            {children}
          </table>
        </div>
      );
    },
    thead({ children, ...props }: { children?: React.ReactNode }) {
      return <thead className="bg-gray-100 dark:bg-gray-800" {...props}>{children}</thead>;
    },
    tbody({ children, ...props }: { children?: React.ReactNode }) {
      return <tbody className="divide-y divide-gray-200 dark:divide-gray-700" {...props}>{children}</tbody>;
    },
    tr({ children, ...props }: { children?: React.ReactNode }) {
      return <tr className="hover:bg-gray-50 dark:hover:bg-gray-900" {...props}>{children}</tr>;
    },
    th({ children, ...props }: { children?: React.ReactNode }) {
      return (
        <th
          className="px-4 py-3 text-left font-medium text-gray-700 dark:text-gray-300"
          {...props}
        >
          {children}
        </th>
      );
    },
    td({ children, ...props }: { children?: React.ReactNode }) {
      return <td className="px-4 py-3 border-t border-gray-200 dark:border-gray-700" {...props}>{children}</td>;
    },
    code(props: {
      inline?: boolean;
      className?: string;
      children?: React.ReactNode;
      // eslint-disable-next-line @typescript-eslint/no-explicit-any
      [key: string]: any; // Using any here as it's required for ReactMarkdown components
    }) {
      const { inline, className, children, ...otherProps } = props;
      const match = /language-(\w+)/.exec(className || '');
      const codeContent = children ? String(children).replace(/\n$/, '') : '';

      // Handle Mermaid diagrams
      if (!inline && match && match[1] === 'mermaid') {
        return (
          <div className="my-8 bg-gray-50 dark:bg-gray-800 rounded-md overflow-hidden shadow-sm">
            <Mermaid
              chart={codeContent}
              className="w-full max-w-full"
              zoomingEnabled={true}
            />
          </div>
        );
      }

      // Handle code blocks
      if (!inline && match) {
        return (
          <div className="my-6 rounded-md overflow-hidden text-sm shadow-sm">
            <div className="bg-gray-800 text-gray-200 px-5 py-2 text-sm flex justify-between items-center">
              <span>{match[1]}</span>
              <button
                onClick={() => {
                  navigator.clipboard.writeText(codeContent);
                }}
                className="text-gray-400 hover:text-white"
                title="Copy code"
              >
                <svg
                  xmlns="http://www.w3.org/2000/svg"
                  className="h-5 w-5"
                  fill="none"
                  viewBox="0 0 24 24"
                  stroke="currentColor"
                >
                  <path
                    strokeLinecap="round"
                    strokeLinejoin="round"
                    strokeWidth={2}
                    d="M8 16H6a2 2 0 01-2-2V6a2 2 0 012-2h8a2 2 0 012 2v2m-6 12h8a2 2 0 002-2v-8a2 2 0 00-2-2h-8a2 2 0 00-2 2v8a2 2 0 002 2z"
                  />
                </svg>
              </button>
            </div>
            <SyntaxHighlighter
              language={match[1]}
              style={tomorrow}
              className="!text-sm"
              customStyle={{ margin: 0, borderRadius: '0 0 0.375rem 0.375rem', padding: '1rem' }}
              showLineNumbers={true}
              wrapLines={true}
              wrapLongLines={true}
              {...otherProps}
            >
              {codeContent}
            </SyntaxHighlighter>
          </div>
        );
      }

      // Handle inline code
      return (
        <code
          className={`${className} font-mono bg-gray-100 dark:bg-gray-800 px-2 py-0.5 rounded text-pink-500 dark:text-pink-400 text-sm`}
          {...otherProps}
        >
          {children}
        </code>
      );
    },
  };

  return (
    <div className="prose prose-base dark:prose-invert max-w-none px-2 py-4">
      <ReactMarkdown
        remarkPlugins={[remarkGfm]}
        rehypePlugins={[rehypeRaw]}
        components={MarkdownComponents}
      >
        {content}
      </ReactMarkdown>
    </div>
  );
};

export default Markdown;

================================================
FILE: src/components/Mermaid.tsx
================================================
import React, { useEffect, useRef, useState } from 'react';
import mermaid from 'mermaid';
// We'll use dynamic import for svg-pan-zoom

// Initialize mermaid with defaults - Japanese aesthetic
mermaid.initialize({
  startOnLoad: true,
  theme: 'neutral',
  securityLevel: 'loose',
  suppressErrorRendering: true,
  logLevel: 'error',
  maxTextSize: 100000, // Increase text size limit
  htmlLabels: true,
  flowchart: {
    htmlLabels: true,
    curve: 'basis',
    nodeSpacing: 60,
    rankSpacing: 60,
    padding: 20,
  },
  themeCSS: `
    /* Japanese aesthetic styles for all diagrams */
    .node rect, .node circle, .node ellipse, .node polygon, .node path {
      fill: #f8f4e6;
      stroke: #d7c4bb;
      stroke-width: 1px;
    }
    .edgePath .path {
      stroke: #9b7cb9;
      stroke-width: 1.5px;
    }
    .edgeLabel {
      background-color: transparent;
      color: #333333;
      p {
        background-color: transparent !important;
      }
    }
    .label {
      color: #333333;
    }
    .cluster rect {
      fill: #f8f4e6;
      stroke: #d7c4bb;
      stroke-width: 1px;
    }

    /* Sequence diagram specific styles */
    .actor {
      fill: #f8f4e6;
      stroke: #d7c4bb;
      stroke-width: 1px;
    }
    text.actor {
      fill: #333333;
      stroke: none;
    }
    .messageText {
      fill: #333333;
      stroke: none;
    }
    .messageLine0, .messageLine1 {
      stroke: #9b7cb9;
    }
    .noteText {
      fill: #333333;
    }

    /* Dark mode overrides - will be applied with data-theme="dark" */
    [data-theme="dark"] .node rect,
    [data-theme="dark"] .node circle,
    [data-theme="dark"] .node ellipse,
    [data-theme="dark"] .node polygon,
    [data-theme="dark"] .node path {
      fill: #222222;
      stroke: #5d4037;
    }
    [data-theme="dark"] .edgePath .path {
      stroke: #9370db;
    }
    [data-theme="dark"] .edgeLabel {
      background-color: transparent;
      color: #f0f0f0;
    }
    [data-theme="dark"] .label {
      color: #f0f0f0;
    }
    [data-theme="dark"] .cluster rect {
      fill: #222222;
      stroke: #5d4037;
    }
    [data-theme="dark"] .flowchart-link {
      stroke: #9370db;
    }

    /* Dark mode sequence diagram overrides */
    [data-theme="dark"] .actor {
      fill: #222222;
      stroke: #5d4037;
    }
    [data-theme="dark"] text.actor {
      fill: #f0f0f0;
      stroke: none;
    }
    [data-theme="dark"] .messageText {
      fill: #f0f0f0;
      stroke: none;
      font-weight: 500;
    }
    [data-theme="dark"] .messageLine0, [data-theme="dark"] .messageLine1 {
      stroke: #9370db;
      stroke-width: 1.5px;
    }
    [data-theme="dark"] .noteText {
      fill: #f0f0f0;
    }
    /* Additional styles for sequence diagram text */
    [data-theme="dark"] #sequenceNumber {
      fill: #f0f0f0;
    }
    [data-theme="dark"] text.sequenceText {
      fill: #f0f0f0;
      font-weight: 500;
    }
    [data-theme="dark"] text.loopText, [data-theme="dark"] text.loopText tspan {
      fill: #f0f0f0;
    }
    /* Add a subtle background to message text for better readability */
    [data-theme="dark"] .messageText, [data-theme="dark"] text.sequenceText {
      paint-order: stroke;
      stroke: #1a1a1a;
      stroke-width: 2px;
      stroke-linecap: round;
      stroke-linejoin: round;
    }

    /* Force text elements to be properly colored */
    text[text-anchor][dominant-baseline],
    text[text-anchor][alignment-baseline],
    .nodeLabel,
    .edgeLabel,
    .label,
    text {
      fill: #777 !important;
    }

    [data-theme="dark"] text[text-anchor][dominant-baseline],
    [data-theme="dark"] text[text-anchor][alignment-baseline],
    [data-theme="dark"] .nodeLabel,
    [data-theme="dark"] .edgeLabel,
    [data-theme="dark"] .label,
    [data-theme="dark"] text {
      fill: #f0f0f0 !important;
    }

    /* Add clickable element styles with subtle transitions */
    .clickable {
      transition: all 0.3s ease;
    }
    .clickable:hover {
      transform: scale(1.03);
      cursor: pointer;
    }
    .clickable:hover > * {
      filter: brightness(0.95);
    }
  `,
  fontFamily: 'var(--font-geist-sans), var(--font-serif-jp), sans-serif',
  fontSize: 12,
});

interface MermaidProps {
  chart: string;
  className?: string;
  zoomingEnabled?: boolean;
}

// Full screen modal component for the diagram
const FullScreenModal: React.FC<{
  isOpen: boolean;
  onClose: () => void;
  children: React.ReactNode;
}> = ({ isOpen, onClose, children }) => {
  const modalRef = useRef<HTMLDivElement>(null);
  const [zoom, setZoom] = useState(1);

  // Close on Escape key
  useEffect(() => {
    const handleKeyDown = (e: KeyboardEvent) => {
      if (e.key === 'Escape') {
        onClose();
      }
    };

    if (isOpen) {
      document.addEventListener('keydown', handleKeyDown);
    }

    return () => {
      document.removeEventListener('keydown', handleKeyDown);
    };
  }, [isOpen, onClose]);

  // Handle click outside to close
  useEffect(() => {
    const handleOutsideClick = (e: MouseEvent) => {
      if (modalRef.current && !modalRef.current.contains(e.target as Node)) {
        onClose();
      }
    };

    if (isOpen) {
      document.addEventListener('mousedown', handleOutsideClick);
    }

    return () => {
      document.removeEventListener('mousedown', handleOutsideClick);
    };
  }, [isOpen, onClose]);

  // Reset zoom when modal opens
  useEffect(() => {
    if (isOpen) {
      setZoom(1);
    }
  }, [isOpen]);

  if (!isOpen) return null;

  return (
    <div className="fixed inset-0 z-50 flex items-center justify-center bg-black bg-opacity-75 p-4">
      <div
        ref={modalRef}
        className="bg-[var(--card-bg)] rounded-lg shadow-custom max-w-5xl max-h-[90vh] w-full overflow-hidden flex flex-col card-japanese"
      >
        {/* Modal header with controls */}
        <div className="flex items-center justify-between p-4 border-b border-[var(--border-color)]">
          <div className="font-medium text-[var(--foreground)] font-serif">図表表示</div>
          <div className="flex items-center gap-4">
            <div className="flex items-center gap-2">
              <button
                onClick={() => setZoom(Math.max(0.5, zoom - 0.1))}
                className="text-[var(--foreground)] hover:bg-[var(--accent-primary)]/10 p-2 rounded-md border border-[var(--border-color)] transition-colors"
                aria-label="Zoom out"
              >
                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
                  <circle cx="11" cy="11" r="8"></circle>
                  <line x1="21" y1="21" x2="16.65" y2="16.65"></line>
                  <line x1="8" y1="11" x2="14" y2="11"></line>
                </svg>
              </button>
              <span className="text-sm text-[var(--muted)]">{Math.round(zoom * 100)}%</span>
              <button
                onClick={() => setZoom(Math.min(2, zoom + 0.1))}
                className="text-[var(--foreground)] hover:bg-[var(--accent-primary)]/10 p-2 rounded-md border border-[var(--border-color)] transition-colors"
                aria-label="Zoom in"
              >
                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
                  <circle cx="11" cy="11" r="8"></circle>
                  <line x1="21" y1="21" x2="16.65" y2="16.65"></line>
                  <line x1="11" y1="8" x2="11" y2="14"></line>
                  <line x1="8" y1="11" x2="14" y2="11"></line>
                </svg>
              </button>
              <button
                onClick={() => setZoom(1)}
                className="text-[var(--foreground)] hover:bg-[var(--accent-primary)]/10 p-2 rounded-md border border-[var(--border-color)] transition-colors"
                aria-label="Reset zoom"
              >
                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
                  <path d="M21 12a9 9 0 1 1-9-9c2.52 0 4.93 1 6.74 2.74L21 8"></path>
                  <path d="M21 3v5h-5"></path>
                </svg>
              </button>
            </div>
            <button
              onClick={onClose}
              className="text-[var(--foreground)] hover:bg-[var(--accent-primary)]/10 p-2 rounded-md border border-[var(--border-color)] transition-colors"
              aria-label="Close"
            >
              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
                <line x1="18" y1="6" x2="6" y2="18"></line>
                <line x1="6" y1="6" x2="18" y2="18"></line>
              </svg>
            </button>
          </div>
        </div>

        {/* Modal content with zoom */}
        <div className="overflow-auto p-6 flex-1 flex items-center justify-center bg-[var(--background)]/50">
          <div
            style={{
              transform: `scale(${zoom})`,
              transformOrigin: 'center center',
              transition: 'transform 0.3s ease-out'
            }}
          >
            {children}
          </div>
        </div>
      </div>
    </div>
  );
};

const Mermaid: React.FC<MermaidProps> = ({ chart, className = '', zoomingEnabled = false }) => {
  const [svg, setSvg] = useState<string>('');
  const [error, setError] = useState<string | null>(null);
  const [isFullscreen, setIsFullscreen] = useState(false);
  const mermaidRef = useRef<HTMLDivElement>(null);
  const containerRef = useRef<HTMLDivElement>(null);
  const idRef = useRef(`mermaid-${Math.random().toString(36).substring(2, 9)}`);
  const isDarkModeRef = useRef(
    typeof window !== 'undefined' &&
    window.matchMedia &&
    window.matchMedia('(prefers-color-scheme: dark)').matches
  );

  // Initialize pan-zoom functionality when SVG is rendered
  useEffect(() => {
    if (svg && zoomingEnabled && containerRef.current) {
      const initializePanZoom = async () => {
        const svgElement = containerRef.current?.querySelector("svg");
        if (svgElement) {
          // Remove any max-width constraints
          svgElement.style.maxWidth = "none";
          svgElement.style.width = "100%";
          svgElement.style.height = "100%";

          try {
            // Dynamically import svg-pan-zoom only when needed in the browser
            const svgPanZoom = (await import("svg-pan-zoom")).default;

            svgPanZoom(svgElement, {
              zoomEnabled: true,
              controlIconsEnabled: true,
              fit: true,
              center: true,
              minZoom: 0.1,
              maxZoom: 10,
              zoomScaleSensitivity: 0.3,
            });
          } catch (error) {
            console.error("Failed to load svg-pan-zoom:", error);
          }
        }
      };

      // Wait for the SVG to be rendered
      setTimeout(() => {
        void initializePanZoom();
      }, 100);
    }
  }, [svg, zoomingEnabled]);

  useEffect(() => {
    if (!chart) return;

    let isMounted = true;

    const renderChart = async () => {
      if (!isMounted) return;

      try {
        setError(null);
        setSvg('');

        // Render the chart directly without preprocessing
        const { svg: renderedSvg } = await mermaid.render(idRef.current, chart);

        if (!isMounted) return;

        let processedSvg = renderedSvg;
        if (isDarkModeRef.current) {
          processedSvg = processedSvg.replace('<svg ', '<svg data-theme="dark" ');
        }

        setSvg(processedSvg);

        // Call mermaid.contentLoaded to ensure proper initialization
        setTimeout(() => {
          mermaid.contentLoaded();
        }, 50);
      } catch (err) {
        console.error('Mermaid rendering error:', err);

        const errorMessage = err instanceof Error ? err.message : String(err);

        if (isMounted) {
          setError(`Failed to render diagram: ${errorMessage}`);

          if (mermaidRef.current) {
            mermaidRef.current.innerHTML = `
              <div class="text-red-500 dark:text-red-400 text-xs mb-1">Syntax error in diagram</div>
              <pre class="text-xs overflow-auto p-2 bg-gray-100 dark:bg-gray-800 rounded">${chart}</pre>
            `;
          }
        }
      }
    };

    renderChart();

    return () => {
      isMounted = false;
    };
  }, [chart]);

  const handleDiagramClick = () => {
    if (!error && svg) {
      setIsFullscreen(true);
    }
  };

  if (error) {
    return (
      <div className={`border border-[var(--highlight)]/30 rounded-md p-4 bg-[var(--highlight)]/5 ${className}`}>
        <div className="flex items-center mb-3">
          <div className="text-[var(--highlight)] text-xs font-medium flex items-center">
            <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4 mr-2" fill="none" viewBox="0 0 24 24" stroke="currentColor">
              <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
            </svg>
            図表レンダリングエラー
          </div>
        </div>
        <div ref={mermaidRef} className="text-xs overflow-auto"></div>
        <div className="mt-3 text-xs text-[var(--muted)] font-serif">
          図表に構文エラーがあり、レンダリングできません。
        </div>
      </div>
    );
  }

  if (!svg) {
    return (
      <div className={`flex justify-center items-center p-4 ${className}`}>
        <div className="flex items-center space-x-2">
          <div className="w-2 h-2 bg-[var(--accent-primary)]/70 rounded-full animate-pulse"></div>
          <div className="w-2 h-2 bg-[var(--accent-primary)]/70 rounded-full animate-pulse delay-75"></div>
          <div className="w-2 h-2 bg-[var(--accent-primary)]/70 rounded-full animate-pulse delay-150"></div>
          <span className="text-[var(--muted)] text-xs ml-2 font-serif">図表を描画中...</span>
        </div>
      </div>
    );
  }

  return (
    <>
      <div
        ref={containerRef}
        className={`w-full max-w-full ${zoomingEnabled ? "h-[600px] p-4" : ""}`}
      >
        <div
          className={`relative group ${zoomingEnabled ? "h-full rounded-lg border-2 border-black" : ""}`}
        >
          <div
            className={`flex justify-center overflow-auto text-center my-2 cursor-pointer hover:shadow-md transition-shadow duration-200 rounded-md ${className} ${zoomingEnabled ? "h-full" : ""}`}
            dangerouslySetInnerHTML={{ __html: svg }}
            onClick={zoomingEnabled ? undefined : handleDiagramClick}
            title={zoomingEnabled ? undefined : "Click to view fullscreen"}
          />

          {!zoomingEnabled && (
            <div className="absolute top-2 right-2 bg-gray-700/70 dark:bg-gray-900/70 text-white p-1.5 rounded-md opacity-0 group-hover:opacity-100 transition-opacity duration-200 flex items-center gap-1.5 text-xs shadow-md pointer-events-none">
              <svg xmlns="http://www.w3.org/2000/svg" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round">
                <circle cx="11" cy="11" r="8"></circle>
                <line x1="21" y1="21" x2="16.65" y2="16.65"></line>
                <line x1="11" y1="8" x2="11" y2="14"></line>
                <line x1="8" y1="11" x2="14" y2="11"></line>
              </svg>
              <span>Click to zoom</span>
            </div>
          )}
        </div>
      </div>

      {!zoomingEnabled && (
        <FullScreenModal
          isOpen={isFullscreen}
          onClose={() => setIsFullscreen(false)}
        >
          <div dangerouslySetInnerHTML={{ __html: svg }} />
        </FullScreenModal>
      )}
    </>
  );
};


export default Mermaid;

================================================
FILE: src/components/ModelSelectionModal.tsx
================================================
'use client';

import React, {useEffect, useState} from 'react';
import {useLanguage} from '@/contexts/LanguageContext';
import UserSelector from './UserSelector';
import WikiTypeSelector from './WikiTypeSelector';
import TokenInput from './TokenInput';

interface ModelSelectionModalProps {
  isOpen: boolean;
  onClose: () => void;
  provider: string;
  setProvider: (value: string) => void;
  model: string;
  setModel: (value: string) => void;
  isCustomModel: boolean;
  setIsCustomModel: (value: boolean) => void;
  customModel: string;
  setCustomModel: (value: string) => void;
  onApply: (token?: string) => void;

  // Wiki type options
  isComprehensiveView: boolean;
  setIsComprehensiveView: (value: boolean) => void;

  // File filter options - optional
  excludedDirs?: string;
  setExcludedDirs?: (value: string) => void;
  excludedFiles?: string;
  setExcludedFiles?: (value: string) => void;
  includedDirs?: string;
  setIncludedDirs?: (value: string) => void;
  includedFiles?: string;
  setIncludedFiles?: (value: string) => void;
  showFileFilters?: boolean;
  showWikiType: boolean;
  
  // Token input for refresh
  showTokenInput?: boolean;
  repositoryType?: 'github' | 'gitlab' | 'bitbucket';
  // Authentication
  authRequired?: boolean;
  authCode?: string;
  setAuthCode?: (code: string) => void;
  isAuthLoading?: boolean;
}

export default function ModelSelectionModal({
  isOpen,
  onClose,
  provider,
  setProvider,
  model,
  setModel,
  isCustomModel,
  setIsCustomModel,
  customModel,
  setCustomModel,
  onApply,
  isComprehensiveView,
  setIsComprehensiveView,
  excludedDirs = '',
  setExcludedDirs,
  excludedFiles = '',
  setExcludedFiles,
  includedDirs = '',
  setIncludedDirs,
  includedFiles = '',
  setIncludedFiles,
  showFileFilters = false,
  authRequired = false,
  authCode = '',
  setAuthCode,
  isAuthLoading,
  showWikiType = true,
  showTokenInput = false,
  repositoryType = 'github',
}: ModelSelectionModalProps) {
  const { messages: t } = useLanguage();

  // Local state for form values (to only apply changes when the user clicks "Submit")
  const [localProvider, setLocalProvider] = useState(provider);
  const [localModel, setLocalModel] = useState(model);
  const [localIsCustomModel, setLocalIsCustomModel] = useState(isCustomModel);
  const [localCustomModel, setLocalCustomModel] = useState(customModel);
  const [localIsComprehensiveView, setLocalIsComprehensiveView] = useState(isComprehensiveView);
  const [localExcludedDirs, setLocalExcludedDirs] = useState(excludedDirs);
  const [localExcludedFiles, setLocalExcludedFiles] = useState(excludedFiles);
  const [localIncludedDirs, setLocalIncludedDirs] = useState(includedDirs);
  const [localIncludedFiles, setLocalIncludedFiles] = useState(includedFiles);
  
  // Token input state
  const [localAccessToken, setLocalAccessToken] = useState('');
  const [localSelectedPlatform, setLocalSelectedPlatform] = useState<'github' | 'gitlab' | 'bitbucket'>(repositoryType);
  const [showTokenSection, setShowTokenSection] = useState(showTokenInput);

  // Reset local state when modal is opened
  useEffect(() => {
    if (isOpen) {
      setLocalProvider(provider);
      setLocalModel(model);
      setLocalIsCustomModel(isCustomModel);
      setLocalCustomModel(customModel);
      setLocalIsComprehensiveView(isComprehensiveView);
      setLocalExcludedDirs(excludedDirs);
      setLocalExcludedFiles(excludedFiles);
      setLocalIncludedDirs(includedDirs);
      setLocalIncludedFiles(includedFiles);
      setLocalSelectedPlatform(repositoryType);
      setLocalAccessToken('');
      setShowTokenSection(showTokenInput);
    }
  }, [isOpen, provider, model, isCustomModel, customModel, isComprehensiveView, excludedDirs, excludedFiles, includedDirs, includedFiles, repositoryType, showTokenInput]);

  // Handler for applying changes
  const handleApply = () => {
    setProvider(localProvider);
    setModel(localModel);
    setIsCustomModel(localIsCustomModel);
    setCustomModel(localCustomModel);
    setIsComprehensiveView(localIsComprehensiveView);
    if (setExcludedDirs) setExcludedDirs(localExcludedDirs);
    if (setExcludedFiles) setExcludedFiles(localExcludedFiles);
    if (setIncludedDirs) setIncludedDirs(localIncludedDirs);
    if (setIncludedFiles) setIncludedFiles(localIncludedFiles);
    
    // Pass token to onApply if needed
    if (showTokenInput) {
      onApply(localAccessToken);
    } else {
      onApply();
    }
    onClose();
  };

  if (!isOpen) return null;

  return (
    <div className="fixed inset-0 z-50 overflow-y-auto">
      <div className="flex min-h-screen items-center justify-center p-4 text-center bg-black/50">
        <div className="relative transform overflow-hidden rounded-lg bg-[var(--card-bg)] text-left shadow-xl transition-all sm:my-8 sm:max-w-lg sm:w-full">
          {/* Modal header with close button */}
          <div className="flex items-center justify-between px-6 py-4 border-b border-[var(--border-color)]">
            <h3 className="text-lg font-medium text-[var(--accent-primary)]">
              <span className="text-[var(--accent-primary)]">{t.form?.modelSelection || 'Model Selection'}</span>
            </h3>
            <button
              type="button"
              onClick={onClose}
              className="text-[var(--muted)] hover:text-[var(--foreground)] focus:outline-none transition-colors"
            >
              <svg className="h-5 w-5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
              </svg>
            </button>
          </div>

          {/* Modal body */}
          <div className="p-6">
            {/* Wiki Type Selector */}
            {
              showWikiType && <WikiTypeSelector
                    isComprehensiveView={localIsComprehensiveView}
                    setIsComprehensiveView={setLocalIsComprehensiveView}
                />
            }

            {/* Divider */}
            <div className="my-4 border-t border-[var(--border-color)]/30"></div>

            {/* Model Selector */}
            <UserSelector
              provider={localProvider}
              setProvider={setLocalProvider}
              model={localModel}
              setModel={setLocalModel}
              isCustomModel={localIsCustomModel}
              setIsCustomModel={setLocalIsCustomModel}
              customModel={localCustomModel}
              setCustomModel={setLocalCustomModel}
              showFileFilters={showFileFilters}
              excludedDirs={localExcludedDirs}
              setExcludedDirs={showFileFilters ? (value: string) => setLocalExcludedDirs(value) : undefined}
              excludedFiles={localExcludedFiles}
              setExcludedFiles={showFileFilters ? (value: string) => setLocalExcludedFiles(value) : undefined}
              includedDirs={localIncludedDirs}
              setIncludedDirs={showFileFilters ? (value: string) => setLocalIncludedDirs(value) : undefined}
              includedFiles={localIncludedFiles}
              setIncludedFiles={showFileFilters ? (value: string) => setLocalIncludedFiles(value) : undefined}
            />

            {/* Token Input Section for refresh */}
            {showTokenInput && (
              <>
                <div className="my-4 border-t border-[var(--border-color)]/30"></div>
                <TokenInput
                  selectedPlatform={localSelectedPlatform}
                  setSelectedPlatform={setLocalSelectedPlatform}
                  accessToken={localAccessToken}
                  setAccessToken={setLocalAccessToken}
                  showTokenSection={showTokenSection}
                  onToggleTokenSection={() => setShowTokenSection(!showTokenSection)}
                  allowPlatformChange={false} // Don't allow platform change during refresh
                />
              </>
            )}
            {/* Authorization Code Input */}
            {isAuthLoading && (
                <div className="mb-4 p-3 bg-[var(--background)]/50 rounded-md border border-[var(--border-color)] text-sm text-[var(--muted)]">
                  Loading authentication status...
                </div>
            )}
            {!isAuthLoading && authRequired && (
                <div className="mb-4 p-4 bg-[var(--background)]/50 rounded-md border border-[var(--border-color)]">
                  <label htmlFor="authCode" className="block text-sm font-medium text-[var(--foreground)] mb-2">
                    {t.form?.authorizationCode || 'Authorization Code'}
                  </label>
                  <input
                      type="password"
                      id="authCode"
                      value={authCode || ''}
                      onChange={(e) => setAuthCode?.(e.target.value)}
                      className="input-japanese block w-full px-3 py-2 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
                      placeholder="Enter your authorization code"
                  />
                  <div className="flex items-center mt-2 text-xs text-[var(--muted)]">
                    <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4 mr-1 text-[var(--muted)]"
                         fill="none" viewBox="0 0 24 24" stroke="currentColor">
                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
                            d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
                    </svg>
                    {t.form?.authorizationRequired || 'Authentication is required to generate the wiki.'}
                  </div>
                </div>
            )}
          </div>

          {/* Modal footer */}
          <div className="flex items-center justify-end gap-2 px-6 py-4 border-t border-[var(--border-color)]">
            <button
              type="button"
              onClick={onClose}
              className="px-4 py-2 text-sm font-medium rounded-md border border-[var(--border-color)]/50 text-[var(--muted)] bg-transparent hover:bg-[var(--background)] hover:text-[var(--foreground)] transition-colors"
            >
              {t.common?.cancel || 'Cancel'}
            </button>
            <button
              type="button"
              onClick={handleApply}
              className="px-4 py-2 text-sm font-medium rounded-md border border-transparent bg-[var(--accent-primary)]/90 text-white hover:bg-[var(--accent-primary)] transition-colors"
            >
              {t.common?.submit || 'Submit'}
            </button>
          </div>
        </div>
      </div>
    </div>
  );
}


================================================
FILE: src/components/ProcessedProjects.tsx
================================================
'use client';

import React, { useState, useEffect, useMemo } from 'react';
import Link from 'next/link';
import { FaTimes, FaTh, FaList } from 'react-icons/fa';

// Interface should match the structure from the API
interface ProcessedProject {
  id: string;
  owner: string;
  repo: string;
  name: string;
  repo_type: string;
  submittedAt: number;
  language: string;
}

interface ProcessedProjectsProps {
  showHeader?: boolean;
  maxItems?: number;
  className?: string;
  messages?: Record<string, Record<string, string>>; // Translation messages with proper typing
}

export default function ProcessedProjects({ 
  showHeader = true, 
  maxItems, 
  className = "",
  messages 
}: ProcessedProjectsProps) {
  const [projects, setProjects] = useState<ProcessedProject[]>([]);
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);
  const [searchQuery, setSearchQuery] = useState('');
  const [viewMode, setViewMode] = useState<'card' | 'list'>('card');

  // Default messages fallback
  const defaultMessages = {
    title: 'Processed Wiki Projects',
    searchPlaceholder: 'Search projects by name, owner, or repository...',
    noProjects: 'No projects found in the server cache. The cache might be empty or the server encountered an issue.',
    noSearchResults: 'No projects match your search criteria.',
    processedOn: 'Processed on:',
    loadingProjects: 'Loading projects...',
    errorLoading: 'Error loading projects:',
    backToHome: 'Back to Home'
  };

  const t = (key: string) => {
    if (messages?.projects?.[key]) {
      return messages.projects[key];
    }
    return defaultMessages[key as keyof typeof defaultMessages] || key;
  };

  useEffect(() => {
    const fetchProjects = async () => {
      setIsLoading(true);
      setError(null);
      try {
        const response = await fetch('/api/wiki/projects');
        if (!response.ok) {
          throw new Error(`Failed to fetch projects: ${response.statusText}`);
        }
        const data = await response.json();
        if (data.error) {
          throw new Error(data.error);
        }
        setProjects(data as ProcessedProject[]);
      } catch (e: unknown) {
        console.error("Failed to load projects from API:", e);
        const message = e instanceof Error ? e.message : "An unknown error occurred.";
        setError(message);
        setProjects([]);
      } finally {
        setIsLoading(false);
      }
    };

    fetchProjects();
  }, []);

  // Filter projects based on search query
  const filteredProjects = useMemo(() => {
    if (!searchQuery.trim()) {
      return maxItems ? projects.slice(0, maxItems) : projects;
    }

    const query = searchQuery.toLowerCase();
    const filtered = projects.filter(project => 
      project.name.toLowerCase().includes(query) ||
      project.owner.toLowerCase().includes(query) ||
      project.repo.toLowerCase().includes(query) ||
      project.repo_type.toLowerCase().includes(query)
    );

    return maxItems ? filtered.slice(0, maxItems) : filtered;
  }, [projects, searchQuery, maxItems]);

  const clearSearch = () => {
    setSearchQuery('');
  };

  const handleDelete = async (project: ProcessedProject) => {
    if (!confirm(`Are you sure you want to delete project ${project.name}?`)) {
      return;
    }
    try {
      const response = await fetch('/api/wiki/projects', {
        method: 'DELETE',
        headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({
          owner: project.owner,
          repo: project.repo,
          repo_type: project.repo_type,
          language: project.language,
        }),
      });
      if (!response.ok) {
        const errorBody = await response.json().catch(() => ({ error: response.statusText }));
        throw new Error(errorBody.error || response.statusText);
      }
      setProjects(prev => prev.filter(p => p.id !== project.id));
    } catch (e: unknown) {
      console.error('Failed to delete project:', e);
      alert(`Failed to delete project: ${e instanceof Error ? e.message : 'Unknown error'}`);
    }
  };

  return (
    <div className={`${className}`}>
      {showHeader && (
        <header className="mb-6">
          <div className="flex items-center justify-between">
            <h1 className="text-3xl font-bold text-[var(--accent-primary)]">{t('title')}</h1>
            <Link href="/" className="text-[var(--accent-primary)] hover:underline">
              {t('backToHome')}
            </Link>
          </div>
        </header>
      )}

      {/* Search Bar and View Toggle */}
      <div className="mb-6 flex flex-col sm:flex-row gap-4">
        {/* Search Bar */}
        <div className="relative flex-1">
          <input
            type="text"
            value={searchQuery}
            onChange={(e) => setSearchQuery(e.target.value)}
            placeholder={t('searchPlaceholder')}
            className="input-japanese block w-full pl-4 pr-12 py-2.5 border border-[var(--border-color)] rounded-lg bg-[var(--background)] text-[var(--foreground)] placeholder:text-[var(--muted)] focus:outline-none focus:border-[var(--accent-primary)] focus:ring-1 focus:ring-[var(--accent-primary)]"
          />
          {searchQuery && (
            <button
              onClick={clearSearch}
              className="absolute inset-y-0 right-0 flex items-center pr-3 text-[var(--muted)] hover:text-[var(--foreground)] transition-colors"
            >
              <FaTimes className="h-4 w-4" />
            </button>
          )}
        </div>

        {/* View Toggle */}
        <div className="flex items-center bg-[var(--background)] border border-[var(--border-color)] rounded-lg p-1">
          <button
            onClick={() => setViewMode('card')}
            className={`p-2 rounded transition-colors ${
              viewMode === 'card'
                ? 'bg-[var(--accent-primary)] text-white'
                : 'text-[var(--muted)] hover:text-[var(--foreground)] hover:bg-[var(--card-bg)]'
            }`}
            title="Card View"
          >
            <FaTh className="h-4 w-4" />
          </button>
          <button
            onClick={() => setViewMode('list')}
            className={`p-2 rounded transition-colors ${
              viewMode === 'list'
                ? 'bg-[var(--accent-primary)] text-white'
                : 'text-[var(--muted)] hover:text-[var(--foreground)] hover:bg-[var(--card-bg)]'
            }`}
            title="List View"
          >
            <FaList className="h-4 w-4" />
          </button>
        </div>
      </div>

      {isLoading && <p className="text-[var(--muted)]">{t('loadingProjects')}</p>}
      {error && <p className="text-[var(--highlight)]">{t('errorLoading')} {error}</p>}

      {!isLoading && !error && filteredProjects.length > 0 && (
        <div className={viewMode === 'card' ? 'grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4' : 'space-y-2'}>
            {filteredProjects.map((project) => (
            viewMode === 'card' ? (
              <div key={project.id} className="relative p-4 border border-[var(--border-color)] rounded-lg bg-[var(--card-bg)] shadow-sm hover:shadow-md transition-all duration-200 hover:scale-[1.02]">
                <button
                  type="button"
                  onClick={() => handleDelete(project)}
                  className="absolute top-2 right-2 text-[var(--muted)] hover:text-[var(--foreground)]"
                  title="Delete project"
                >
                  <FaTimes className="h-4 w-4" />
                </button>
                <Link
                  href={`/${project.owner}/${project.repo}?type=${project.repo_type}&language=${project.language}`}
                  className="block"
                >
                  <h3 className="text-lg font-semibold text-[var(--link-color)] hover:underline mb-2 line-clamp-2">
                    {project.name}
                  </h3>
                  <div className="flex flex-wrap gap-2 mb-3">
                    <span className="px-2 py-1 text-xs bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] rounded-full border border-[var(--accent-primary)]/20">
                      {project.repo_type}
                    </span>
                    <span className="px-2 py-1 text-xs bg-[var(--background)] text-[var(--muted)] rounded-full border border-[var(--border-color)]">
                      {project.language}
                    </span>
                  </div>
                  <p className="text-xs text-[var(--muted)]">
                    {t('processedOn')} {new Date(project.submittedAt).toLocaleDateString()}
                  </p>
                </Link>
              </div>
            ) : (
              <div key={project.id} className="relative p-3 border border-[var(--border-color)] rounded-lg bg-[var(--card-bg)] hover:bg-[var(--background)] transition-colors">
                <button
                  type="button"
                  onClick={() => handleDelete(project)}
                  className="absolute top-2 right-2 text-[var(--muted)] hover:text-[var(--foreground)]"
                  title="Delete project"
                >
                  <FaTimes className="h-4 w-4" />
                </button>
                <Link
                  href={`/${project.owner}/${project.repo}?type=${project.repo_type}&language=${project.language}`}
                  className="flex items-center justify-between"
                >
                  <div className="flex-1 min-w-0">
                    <h3 className="text-base font-medium text-[var(--link-color)] hover:underline truncate">
                      {project.name}
                    </h3>
                    <p className="text-xs text-[var(--muted)] mt-1">
                      {t('processedOn')} {new Date(project.submittedAt).toLocaleDateString()} • {project.repo_type} • {project.language}
                    </p>
                  </div>
                  <div className="flex gap-2 ml-4">
                    <span className="px-2 py-1 text-xs bg-[var(--accent-primary)]/10 text-[var(--accent-primary)] rounded border border-[var(--accent-primary)]/20">
                      {project.repo_type}
                    </span>
                  </div>
                </Link>
              </div>
            )
          ))}
        </div>
      )}

      {!isLoading && !error && projects.length > 0 && filteredProjects.length === 0 && searchQuery && (
        <p className="text-[var(--muted)]">{t('noSearchResults')}</p>
      )}

      {!isLoading && !error && projects.length === 0 && (
        <p className="text-[var(--muted)]">{t('noProjects')}</p>
      )}
    </div>
  );
}


================================================
FILE: src/components/TokenInput.tsx
================================================
'use client';

import React from 'react';
import { useLanguage } from '@/contexts/LanguageContext';

interface TokenInputProps {
  selectedPlatform: 'github' | 'gitlab' | 'bitbucket';
  setSelectedPlatform: (value: 'github' | 'gitlab' | 'bitbucket') => void;
  accessToken: string;
  setAccessToken: (value: string) => void;
  showTokenSection?: boolean;
  onToggleTokenSection?: () => void;
  allowPlatformChange?: boolean;
}

export default function TokenInput({
  selectedPlatform,
  setSelectedPlatform,
  accessToken,
  setAccessToken,
  showTokenSection = true,
  onToggleTokenSection,
  allowPlatformChange = true
}: TokenInputProps) {
  const { messages: t } = useLanguage();

  const platformName = selectedPlatform.charAt(0).toUpperCase() + selectedPlatform.slice(1);

  return (
    <div className="mb-4">
      {onToggleTokenSection && (
        <button
          type="button"
          onClick={onToggleTokenSection}
          className="text-sm text-[var(--accent-primary)] hover:text-[var(--highlight)] flex items-center transition-colors border-b border-[var(--border-color)] hover:border-[var(--accent-primary)] pb-0.5 mb-2"
        >
          {showTokenSection ? t.form?.hideTokens || 'Hide Access Tokens' : t.form?.addTokens || 'Add Access Tokens for Private Repositories'}
        </button>
      )}

      {showTokenSection && (
        <div className="mt-2 p-4 bg-[var(--background)]/50 rounded-md border border-[var(--border-color)]">
          {allowPlatformChange && (
            <div className="mb-3">
              <label className="block text-xs font-medium text-[var(--foreground)] mb-2">
                {t.form?.selectPlatform || 'Select Platform'}
              </label>
              <div className="flex gap-2">
                <button
                  type="button"
                  onClick={() => setSelectedPlatform('github')}
                  className={`flex-1 flex items-center justify-center gap-2 px-3 py-2 rounded-md border transition-all ${selectedPlatform === 'github'
                    ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)] shadow-sm'
                    : 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                    }`}
                >
                  <span className="text-sm">GitHub</span>
                </button>
                <button
                  type="button"
                  onClick={() => setSelectedPlatform('gitlab')}
                  className={`flex-1 flex items-center justify-center gap-2 px-3 py-2 rounded-md border transition-all ${selectedPlatform === 'gitlab'
                    ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)] shadow-sm'
                    : 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                    }`}
                >
                  <span className="text-sm">GitLab</span>
                </button>
                <button
                  type="button"
                  onClick={() => setSelectedPlatform('bitbucket')}
                  className={`flex-1 flex items-center justify-center gap-2 px-3 py-2 rounded-md border transition-all ${selectedPlatform === 'bitbucket'
                    ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)] shadow-sm'
                    : 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                    }`}
                >
                  <span className="text-sm">Bitbucket</span>
                </button>
              </div>
            </div>
          )}

          <div>
            <label htmlFor="access-token" className="block text-xs font-medium text-[var(--foreground)] mb-2">
              {(t.form?.personalAccessToken || 'Personal Access Token').replace('{platform}', platformName)}
            </label>
            <input
              id="access-token"
              type="password"
              value={accessToken}
              onChange={(e) => setAccessToken(e.target.value)}
              placeholder={(t.form?.tokenPlaceholder || 'Enter your access token').replace('{platform}', platformName)}
              className="input-japanese block w-full px-3 py-2 rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)] text-sm"
            />
            <div className="flex items-center mt-2 text-xs text-[var(--muted)]">
              <svg xmlns="http://www.w3.org/2000/svg" className="h-4 w-4 mr-1 text-[var(--muted)]"
                fill="none" viewBox="0 0 24 24" stroke="currentColor">
                <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2}
                  d="M13 16h-1v-4h-1m1-4h.01M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
              </svg>
              {t.form?.tokenSecurityNote || 'Your token is stored locally and never sent to our servers.'}
            </div>
          </div>
        </div>
      )}
    </div>
  );
} 

================================================
FILE: src/components/UserSelector.tsx
================================================
'use client';

import React, { useState, useEffect } from 'react';
import { useLanguage } from '@/contexts/LanguageContext';

// Define the interfaces for our model configuration
interface Model {
  id: string;
  name: string;
}

interface Provider {
  id: string;
  name: string;
  models: Model[];
  supportsCustomModel?: boolean;
}

interface ModelConfig {
  providers: Provider[];
  defaultProvider: string;
}

interface ModelSelectorProps {
  provider: string;
  setProvider: (value: string) => void;
  model: string;
  setModel: (value: string) => void;
  isCustomModel: boolean;
  setIsCustomModel: (value: boolean) => void;
  customModel: string;
  setCustomModel: (value: string) => void;

  // File filter configuration
  showFileFilters?: boolean;
  excludedDirs?: string;
  setExcludedDirs?: (value: string) => void;
  excludedFiles?: string;
  setExcludedFiles?: (value: string) => void;
  includedDirs?: string;
  setIncludedDirs?: (value: string) => void;
  includedFiles?: string;
  setIncludedFiles?: (value: string) => void;
}

export default function UserSelector({
  provider,
  setProvider,
  model,
  setModel,
  isCustomModel,
  setIsCustomModel,
  customModel,
  setCustomModel,

  // File filter configuration
  showFileFilters = false,
  excludedDirs = '',
  setExcludedDirs,
  excludedFiles = '',
  setExcludedFiles,
  includedDirs = '',
  setIncludedDirs,
  includedFiles = '',
  setIncludedFiles
}: ModelSelectorProps) {
  // State to manage the visibility of the filters modal and filter section
  const [isFilterSectionOpen, setIsFilterSectionOpen] = useState(false);
  // State to manage filter mode: 'exclude' or 'include'
  const [filterMode, setFilterMode] = useState<'exclude' | 'include'>('exclude');
  const { messages: t } = useLanguage();

  // State for model configurations from backend
  const [modelConfig, setModelConfig] = useState<ModelConfig | null>(null);
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

  // State for viewing default values
  const [showDefaultDirs, setShowDefaultDirs] = useState(false);
  const [showDefaultFiles, setShowDefaultFiles] = useState(false);

  // Fetch model configurations from the backend
  useEffect(() => {
    const fetchModelConfig = async () => {
      try {
        setIsLoading(true);
        setError(null);

        const response = await fetch('/api/models/config');

        if (!response.ok) {
          throw new Error(`Error fetching model configurations: ${response.status}`);
        }

        const data = await response.json();
        setModelConfig(data);

        // Initialize provider and model with defaults from API if not already set
        if (!provider && data.defaultProvider) {
          setProvider(data.defaultProvider);

          // Find the default provider and set its default model
          const selectedProvider = data.providers.find((p: Provider) => p.id === data.defaultProvider);
          if (selectedProvider && selectedProvider.models.length > 0) {
            setModel(selectedProvider.models[0].id);
          }
        }
      } catch (err) {
        console.error('Failed to fetch model configurations:', err);
        setError('Failed to load model configurations. Using default options.');
      } finally {
        setIsLoading(false);
      }
    };

    fetchModelConfig();
  }, [provider, setModel, setProvider]);

  // Handler for changing provider
  const handleProviderChange = (newProvider: string) => {
    setProvider(newProvider);
    setTimeout(() => {
      // Reset custom model state when changing providers
      setIsCustomModel(false);

      // Set default model for the selected provider
      if (modelConfig) {
        const selectedProvider = modelConfig.providers.find((p: Provider) => p.id === newProvider);
        if (selectedProvider && selectedProvider.models.length > 0) {
          setModel(selectedProvider.models[0].id);
        }
      }
    }, 10);
  };

  // Default excluded directories from config.py
  const defaultExcludedDirs =
`./.venv/
./venv/
./env/
./virtualenv/
./node_modules/
./bower_components/
./jspm_packages/
./.git/
./.svn/
./.hg/
./.bzr/
./__pycache__/
./.pytest_cache/
./.mypy_cache/
./.ruff_cache/
./.coverage/
./dist/
./build/
./out/
./target/
./bin/
./obj/
./docs/
./_docs/
./site-docs/
./_site/
./.idea/
./.vscode/
./.vs/
./.eclipse/
./.settings/
./logs/
./log/
./tmp/
./temp/
./.eng`;

  // Default excluded files from config.py
  const defaultExcludedFiles =
`package-lock.json
yarn.lock
pnpm-lock.yaml
npm-shrinkwrap.json
poetry.lock
Pipfile.lock
requirements.txt.lock
Cargo.lock
composer.lock
.lock
.DS_Store
Thumbs.db
desktop.ini
*.lnk
.env
.env.*
*.env
*.cfg
*.ini
.flaskenv
.gitignore
.gitattributes
.gitmodules
.github
.gitlab-ci.yml
.prettierrc
.eslintrc
.eslintignore
.stylelintrc
.editorconfig
.jshintrc
.pylintrc
.flake8
mypy.ini
pyproject.toml
tsconfig.json
webpack.config.js
babel.config.js
rollup.config.js
jest.config.js
karma.conf.js
vite.config.js
next.config.js
*.min.js
*.min.css
*.bundle.js
*.bundle.css
*.map
*.gz
*.zip
*.tar
*.tgz
*.rar
*.pyc
*.pyo
*.pyd
*.so
*.dll
*.class
*.exe
*.o
*.a
*.jpg
*.jpeg
*.png
*.gif
*.ico
*.svg
*.webp
*.mp3
*.mp4
*.wav
*.avi
*.mov
*.webm
*.csv
*.tsv
*.xls
*.xlsx
*.db
*.sqlite
*.sqlite3
*.pdf
*.docx
*.pptx`;

  // Display loading state
  if (isLoading) {
    return (
      <div className="flex flex-col gap-2">
        <div className="text-sm text-[var(--muted)]">Loading model configurations...</div>
      </div>
    );
  }

  return (
    <div className="flex flex-col gap-3">
      <div className="space-y-4">
        {error && (
          <div className="text-sm text-red-500 mb-2">{error}</div>
        )}

        {/* Provider Selection */}
        <div>
          <label htmlFor="provider-dropdown" className="block text-xs font-medium text-[var(--foreground)] mb-1.5">
            {t.form?.modelProvider || 'Model Provider'}
          </label>
          <select
            id="provider-dropdown"
            value={provider}
            onChange={(e) => handleProviderChange(e.target.value)}
            className="input-japanese block w-full px-2.5 py-1.5 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
          >
            <option value="" disabled>{t.form?.selectProvider || 'Select Provider'}</option>
            {modelConfig?.providers.map((providerOption) => (
              <option key={providerOption.id} value={providerOption.id}>
                {t.form?.[`provider${providerOption.id.charAt(0).toUpperCase() + providerOption.id.slice(1)}`] || providerOption.name}
              </option>
            ))}
          </select>
        </div>

        {/* Model Selection - consistent height regardless of type */}
        <div>
          <label htmlFor={isCustomModel ? "custom-model-input" : "model-dropdown"} className="block text-xs font-medium text-[var(--foreground)] mb-1.5">
            {t.form?.modelSelection || 'Model Selection'}
          </label>

          {isCustomModel ? (
            <input
              id="custom-model-input"
              type="text"
              value={customModel}
              onChange={(e) => {
                setCustomModel(e.target.value);
                setModel(e.target.value);
              }}
              placeholder={t.form?.customModelPlaceholder || 'Enter custom model name'}
              className="input-japanese block w-full px-2.5 py-1.5 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
            />
          ) : (
            <select
              id="model-dropdown"
              value={model}
              onChange={(e) => setModel(e.target.value)}
              className="input-japanese block w-full px-2.5 py-1.5 text-sm rounded-md bg-transparent text-[var(--foreground)] focus:outline-none focus:border-[var(--accent-primary)]"
              disabled={!provider || isLoading || !modelConfig?.providers.find(p => p.id === provider)?.models?.length}
            >
              {modelConfig?.providers.find((p: Provider) => p.id === provider)?.models.map((modelOption) => (
                <option key={modelOption.id} value={modelOption.id}>
                  {modelOption.name}
                </option>
              )) || <option value="">{t.form?.selectModel || 'Select Model'}</option>}
            </select>
          )}
        </div>

        {/* Custom model toggle - only when provider supports it */}
        {modelConfig?.providers.find((p: Provider) => p.id === provider)?.supportsCustomModel && (
          <div className="mb-2">
            <div className="flex items-center pb-1">
              <div
                className="relative flex items-center cursor-pointer"
                onClick={() => {
                  const newValue = !isCustomModel;
                  setIsCustomModel(newValue);
                  if (newValue) {
                    setCustomModel(model);
                  }
                }}
              >
                <input
                  id="use-custom-model"
                  type="checkbox"
                  checked={isCustomModel}
                  onChange={() => {}}
                  className="sr-only"
                />
                <div className={`w-10 h-5 rounded-full transition-colors ${isCustomModel ? 'bg-[var(--accent-primary)]' : 'bg-gray-300 dark:bg-gray-600'}`}></div>
                <div className={`absolute left-0.5 top-0.5 w-4 h-4 rounded-full bg-white transition-transform transform ${isCustomModel ? 'translate-x-5' : ''}`}></div>
              </div>
              <label
                htmlFor="use-custom-model"
                className="ml-2 text-sm font-medium text-[var(--muted)] cursor-pointer"
                onClick={(e) => {
                  e.preventDefault();
                  const newValue = !isCustomModel;
                  setIsCustomModel(newValue);
                  if (newValue) {
                    setCustomModel(model);
                  }
                }}
              >
                {t.form?.useCustomModel || 'Use custom model'}
              </label>
            </div>
          </div>
        )}

        {showFileFilters && (
          <div className="mt-4">
            <button
              type="button"
              onClick={() => setIsFilterSectionOpen(!isFilterSectionOpen)}
              className="flex items-center text-sm text-[var(--accent-primary)] hover:text-[var(--accent-primary)]/80 transition-colors"
            >
              <span className="mr-1.5 text-xs">{isFilterSectionOpen ? '▼' : '►'}</span>
              {t.form?.advancedOptions || 'Advanced Options'}
            </button>

            {isFilterSectionOpen && (
              <div className="mt-3 p-3 border border-[var(--border-color)]/70 rounded-md bg-[var(--background)]/30">
                {/* Filter Mode Selection */}
                <div className="mb-4">
                  <label className="block text-sm font-medium text-[var(--foreground)] mb-2">
                    {t.form?.filterMode || 'Filter Mode'}
                  </label>
                  <div className="flex gap-2">
                    <button
                      type="button"
                      onClick={() => setFilterMode('exclude')}
                      className={`flex-1 px-3 py-2 rounded-md border text-sm transition-colors ${
                        filterMode === 'exclude'
                          ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)]'
                          : 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                      }`}
                    >
                      {t.form?.excludeMode || 'Exclude Paths'}
                    </button>
                    <button
                      type="button"
                      onClick={() => setFilterMode('include')}
                      className={`flex-1 px-3 py-2 rounded-md border text-sm transition-colors ${
                        filterMode === 'include'
                          ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)] text-[var(--accent-primary)]'
                          : 'border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
                      }`}
                    >
                      {t.form?.includeMode || 'Include Only Paths'}
                    </button>
                  </div>
                  <p className="text-xs text-[var(--muted)] mt-1">
                    {filterMode === 'exclude'
                      ? (t.form?.excludeModeDescription || 'Specify paths to exclude from processing (default behavior)')
                      : (t.form?.includeModeDescription || 'Specify only the paths to include, ignoring all others')
                    }
                  </p>
                </div>

                {/* Directories Section */}
                <div className="mb-4">
                  <label className="block text-sm font-medium text-[var(--muted)] mb-1.5">
                    {filterMode === 'exclude'
                      ? (t.form?.excludedDirs || 'Excluded Directories')
                      : (t.form?.includedDirs || 'Included Directories')
                    }
                  </label>
                  <textarea
                    value={filterMode === 'exclude' ? excludedDirs : includedDirs}
                    onChange={(e) => {
                      if (filterMode === 'exclude') {
                        setExcludedDirs?.(e.target.value);
                      } else {
                        setIncludedDirs?.(e.target.value);
                      }
                    }}
                    rows={4}
                    className="block w-full rounded-md border border-[var(--border-color)]/50 bg-[var(--input-bg)] text-[var(--foreground)] px-3 py-2 text-sm focus:border-[var(--accent-primary)] focus:ring-1 focus:ring-opacity-50 shadow-sm"
                    placeholder={filterMode === 'exclude'
                      ? (t.form?.enterExcludedDirs || 'Enter excluded directories, one per line...')
                      : (t.form?.enterIncludedDirs || 'Enter included directories, one per line...')
                    }
                  />
                  {filterMode === 'exclude' && (
                    <>
                      <div className="flex mt-1.5">
                        <button
                          type="button"
                          onClick={() => setShowDefaultDirs(!showDefaultDirs)}
                          className="text-xs text-[var(--accent-primary)] hover:text-[var(--accent-primary)]/80 transition-colors"
                        >
                          {showDefaultDirs ? (t.form?.hideDefault || 'Hide Default') : (t.form?.viewDefault || 'View Default')}
                        </button>
                      </div>
                      {showDefaultDirs && (
                        <div className="mt-2 p-2 rounded bg-[var(--background)]/50 text-xs">
                          <p className="mb-1 text-[var(--muted)]">{t.form?.defaultNote || 'These defaults are already applied. Add your custom exclusions above.'}</p>
                          <pre className="whitespace-pre-wrap font-mono text-[var(--muted)] overflow-y-auto max-h-32">{defaultExcludedDirs}</pre>
                        </div>
                      )}
                    </>
                  )}
                </div>

                {/* Files Section */}
                <div>
                  <label className="block text-sm font-medium text-[var(--muted)] mb-1.5">
                    {filterMode === 'exclude'
                      ? (t.form?.excludedFiles || 'Excluded Files')
                      : (t.form?.includedFiles || 'Included Files')
                    }
                  </label>
                  <textarea
                    value={filterMode === 'exclude' ? excludedFiles : includedFiles}
                    onChange={(e) => {
                      if (filterMode === 'exclude') {
                        setExcludedFiles?.(e.target.value);
                      } else {
                        setIncludedFiles?.(e.target.value);
                      }
                    }}
                    rows={4}
                    className="block w-full rounded-md border border-[var(--border-color)]/50 bg-[var(--input-bg)] text-[var(--foreground)] px-3 py-2 text-sm focus:border-[var(--accent-primary)] focus:ring-1 focus:ring-opacity-50 shadow-sm"
                    placeholder={filterMode === 'exclude'
                      ? (t.form?.enterExcludedFiles || 'Enter excluded files, one per line...')
                      : (t.form?.enterIncludedFiles || 'Enter included files, one per line...')
                    }
                  />
                  {filterMode === 'exclude' && (
                    <>
                      <div className="flex mt-1.5">
                        <button
                          type="button"
                          onClick={() => setShowDefaultFiles(!showDefaultFiles)}
                          className="text-xs text-[var(--accent-primary)] hover:text-[var(--accent-primary)]/80 transition-colors"
                        >
                          {showDefaultFiles ? (t.form?.hideDefault || 'Hide Default') : (t.form?.viewDefault || 'View Default')}
                        </button>
                      </div>
                      {showDefaultFiles && (
                        <div className="mt-2 p-2 rounded bg-[var(--background)]/50 text-xs">
                          <p className="mb-1 text-[var(--muted)]">{t.form?.defaultNote || 'These defaults are already applied. Add your custom exclusions above.'}</p>
                          <pre className="whitespace-pre-wrap font-mono text-[var(--muted)] overflow-y-auto max-h-32">{defaultExcludedFiles}</pre>
                        </div>
                      )}
                    </>
                  )}
                </div>
              </div>
            )}
          </div>
        )}
      </div>
    </div>
  );
}


================================================
FILE: src/components/WikiTreeView.tsx
================================================
'use client';

import React, { useState } from 'react';
import { FaChevronRight, FaChevronDown } from 'react-icons/fa';

// Import interfaces from the page component
interface WikiPage {
  id: string;
  title: string;
  content: string;
  filePaths: string[];
  importance: 'high' | 'medium' | 'low';
  relatedPages: string[];
  parentId?: string;
  isSection?: boolean;
  children?: string[];
}

interface WikiSection {
  id: string;
  title: string;
  pages: string[];
  subsections?: string[];
}

interface WikiStructure {
  id: string;
  title: string;
  description: string;
  pages: WikiPage[];
  sections: WikiSection[];
  rootSections: string[];
}

interface WikiTreeViewProps {
  wikiStructure: WikiStructure;
  currentPageId: string | undefined;
  onPageSelect: (pageId: string) => void;
  messages?: {
    pages?: string;
    [key: string]: string | undefined;
  };
}

const WikiTreeView: React.FC<WikiTreeViewProps> = ({
  wikiStructure,
  currentPageId,
  onPageSelect,
}) => {
  const [expandedSections, setExpandedSections] = useState<Set<string>>(
    new Set(wikiStructure.rootSections)
  );

  const toggleSection = (sectionId: string, event: React.MouseEvent) => {
    event.stopPropagation();
    setExpandedSections(prev => {
      const newSet = new Set(prev);
      if (newSet.has(sectionId)) {
        newSet.delete(sectionId);
      } else {
        newSet.add(sectionId);
      }
      return newSet;
    });
  };

  const renderSection = (sectionId: string, level = 0) => {
    const section = wikiStructure.sections.find(s => s.id === sectionId);
    if (!section) return null;

    const isExpanded = expandedSections.has(sectionId);

    return (
      <div key={sectionId} className="mb-2">
        <button
          className={`flex items-center w-full text-left px-2 py-1.5 rounded-md text-sm font-medium text-[var(--foreground)] hover:bg-[var(--background)]/70 transition-colors ${
            level === 0 ? 'bg-[var(--background)]/50' : ''
          }`}
          onClick={(e) => toggleSection(sectionId, e)}
        >
          {isExpanded ? (
            <FaChevronDown className="mr-2 text-xs" />
          ) : (
            <FaChevronRight className="mr-2 text-xs" />
          )}
          <span className="truncate">{section.title}</span>
        </button>

        {isExpanded && (
          <div className={`ml-4 mt-1 space-y-1 ${level > 0 ? 'pl-2 border-l border-[var(--border-color)]/30' : ''}`}>
            {/* Render pages in this section */}
            {section.pages.map(pageId => {
              const page = wikiStructure.pages.find(p => p.id === pageId);
              if (!page) return null;

              return (
                <button
                  key={pageId}
                  className={`w-full text-left px-3 py-1.5 rounded-md text-sm transition-colors ${
                    currentPageId === pageId
                      ? 'bg-[var(--accent-primary)]/20 text-[var(--accent-primary)] border border-[var(--accent-primary)]/30'
                      : 'text-[var(--foreground)] hover:bg-[var(--background)] border border-transparent'
                  }`}
                  onClick={() => onPageSelect(pageId)}
                >
                  <div className="flex items-center">
                    <div
                      className={`w-2 h-2 rounded-full mr-2 flex-shrink-0 ${
                        page.importance === 'high'
                          ? 'bg-[#9b7cb9]'
                          : page.importance === 'medium'
                          ? 'bg-[#d7c4bb]'
                          : 'bg-[#e8927c]'
                      }`}
                    ></div>
                    <span className="truncate">{page.title}</span>
                  </div>
                </button>
              );
            })}

            {/* Render subsections recursively */}
            {section.subsections?.map(subsectionId =>
              renderSection(subsectionId, level + 1)
            )}
          </div>
        )}
      </div>
    );
  };

  // If there are no sections defined yet, or if sections/rootSections are empty arrays, fall back to the flat list view
  if (!wikiStructure.sections || wikiStructure.sections.length === 0 || !wikiStructure.rootSections || wikiStructure.rootSections.length === 0) {
    console.log("WikiTreeView: Falling back to flat list view due to missing or empty sections/rootSections");
    return (
      <ul className="space-y-2">
        {wikiStructure.pages.map(page => (
          <li key={page.id}>
            <button
              className={`w-full text-left px-3 py-2 rounded-md text-sm transition-colors ${
                currentPageId === page.id
                  ? 'bg-[var(--accent-primary)]/20 text-[var(--accent-primary)] border border-[var(--accent-primary)]/30'
                  : 'text-[var(--foreground)] hover:bg-[var(--background)] border border-transparent'
              }`}
              onClick={() => onPageSelect(page.id)}
            >
              <div className="flex items-center">
                <div
                  className={`w-2 h-2 rounded-full mr-2 flex-shrink-0 ${
                    page.importance === 'high'
                      ? 'bg-[#9b7cb9]'
                      : page.importance === 'medium'
                      ? 'bg-[#d7c4bb]'
                      : 'bg-[#e8927c]'
                  }`}
                ></div>
                <span className="truncate">{page.title}</span>
              </div>
            </button>
          </li>
        ))}
      </ul>
    );
  }

  // Log information about the sections for debugging
  console.log("WikiTreeView: Rendering tree view with sections:", wikiStructure.sections);
  console.log("WikiTreeView: Root sections:", wikiStructure.rootSections);

  return (
    <div className="space-y-1">
      {wikiStructure.rootSections.map(sectionId => {
        const section = wikiStructure.sections.find(s => s.id === sectionId);
        if (!section) {
          console.warn(`WikiTreeView: Could not find section with id ${sectionId}`);
          return null;
        }
        return renderSection(sectionId);
      })}
    </div>
  );
};

export default WikiTreeView;

================================================
FILE: src/components/WikiTypeSelector.tsx
================================================
'use client';

import React from 'react';
import { useLanguage } from '@/contexts/LanguageContext';
import { FaBookOpen, FaList } from 'react-icons/fa';

interface WikiTypeSelectorProps {
  isComprehensiveView: boolean;
  setIsComprehensiveView: (value: boolean) => void;
}

const WikiTypeSelector: React.FC<WikiTypeSelectorProps> = ({
  isComprehensiveView,
  setIsComprehensiveView,
}) => {
  const { messages: t } = useLanguage();

  return (
    <div className="mb-4">
      <label className="block text-sm font-medium text-[var(--foreground)] mb-2">
        {t.form?.wikiType || 'Wiki Type'}
      </label>
      <div className="flex flex-col sm:flex-row gap-3">
        <button
          type="button"
          onClick={() => setIsComprehensiveView(true)}
          className={`flex items-center justify-between p-3 rounded-md border transition-colors ${
            isComprehensiveView
              ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)]/30 text-[var(--accent-primary)]'
              : 'bg-[var(--background)]/50 border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
          }`}
        >
          <div className="flex items-center">
            <FaBookOpen className="mr-2" />
            <div className="text-left">
              <div className="font-medium">{t.form?.comprehensive || 'Comprehensive'}</div>
              <div className="text-xs opacity-80">
                {t.form?.comprehensiveDescription || 'Detailed wiki with structured sections and more pages'}
              </div>
            </div>
          </div>
          {isComprehensiveView && (
            <div className="ml-2 h-4 w-4 rounded-full bg-[var(--accent-primary)]/20 flex items-center justify-center">
              <div className="h-2 w-2 rounded-full bg-[var(--accent-primary)]"></div>
            </div>
          )}
        </button>
        
        <button
          type="button"
          onClick={() => setIsComprehensiveView(false)}
          className={`flex items-center justify-between p-3 rounded-md border transition-colors ${
            !isComprehensiveView
              ? 'bg-[var(--accent-primary)]/10 border-[var(--accent-primary)]/30 text-[var(--accent-primary)]'
              : 'bg-[var(--background)]/50 border-[var(--border-color)] text-[var(--foreground)] hover:bg-[var(--background)]'
          }`}
        >
          <div className="flex items-center">
            <FaList className="mr-2" />
            <div className="text-left">
              <div className="font-medium">{t.form?.concise || 'Concise'}</div>
              <div className="text-xs opacity-80">
                {t.form?.conciseDescription || 'Simplified wiki with fewer pages and essential information'}
              </div>
            </div>
          </div>
          {!isComprehensiveView && (
            <div className="ml-2 h-4 w-4 rounded-full bg-[var(--accent-primary)]/20 flex items-center justify-center">
              <div className="h-2 w-2 rounded-full bg-[var(--accent-primary)]"></div>
            </div>
          )}
        </button>
      </div>
    </div>
  );
};

export default WikiTypeSelector;


================================================
FILE: src/components/theme-toggle.tsx
================================================
"use client";

import { useTheme } from "next-themes";

export default function ThemeToggle() {
  const { theme, setTheme } = useTheme();

  return (
    <button
      type="button"
      className="theme-toggle-button cursor-pointer bg-transparent border border-[var(--border-color)] text-[var(--foreground)] hover:border-[var(--accent-primary)] active:bg-[var(--accent-secondary)]/10 rounded-md p-2 transition-all duration-300"
      title="Toggle theme"
      aria-label="Toggle theme"
      onClick={() => setTheme(theme === "dark" ? "light" : "dark")}
    >
      {/* Japanese-inspired sun and moon icons */}
      <div className="relative w-5 h-5">
        {/* Sun icon (light mode) */}
        <div className={`absolute inset-0 transition-opacity duration-300 ${theme === 'dark' ? 'opacity-0' : 'opacity-100'}`}>
          <svg viewBox="0 0 24 24" fill="none" className="w-5 h-5" aria-label="Light Mode">
            <circle cx="12" cy="12" r="5" stroke="currentColor" strokeWidth="2" />
            <path d="M12 2V4" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M12 20V22" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M4 12L2 12" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M22 12L20 12" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M19.778 4.22183L17.6569 6.34315" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M6.34309 17.6569L4.22177 19.7782" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M19.778 19.7782L17.6569 17.6569" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
            <path d="M6.34309 6.34315L4.22177 4.22183" stroke="currentColor" strokeWidth="2" strokeLinecap="round" />
          </svg>
        </div>

        {/* Moon icon (dark mode) */}
        <div className={`absolute inset-0 transition-opacity duration-300 ${theme === 'dark' ? 'opacity-100' : 'opacity-0'}`}>
          <svg viewBox="0 0 24 24" fill="none" className="w-5 h-5" aria-label="Dark Mode">
            <path
              d="M21 12.79A9 9 0 1 1 11.21 3 7 7 0 0 0 21 12.79z"
              stroke="currentColor"
              strokeWidth="2"
              strokeLinecap="round"
              strokeLinejoin="round"
              fill="none"
            />
          </svg>
        </div>
      </div>
    </button>
  );
}


================================================
FILE: src/contexts/LanguageContext.tsx
================================================
/* eslint-disable @typescript-eslint/no-explicit-any */
'use client';

import React, { createContext, useContext, useState, useEffect, ReactNode } from 'react';
import { locales } from '@/i18n';

type Messages = Record<string, any>;
type LanguageContextType = {
  language: string;
  setLanguage: (lang: string) => void;
  messages: Messages;
  supportedLanguages: Record<string, string>;
};

const LanguageContext = createContext<LanguageContextType | undefined>(undefined);

export function LanguageProvider({ children }: { children: ReactNode }) {
  // Initialize with 'en' or get from localStorage if available
  const [language, setLanguageState] = useState<string>('en');
  const [messages, setMessages] = useState<Messages>({});
  const [isLoading, setIsLoading] = useState<boolean>(true);
  const [supportedLanguages, setSupportedLanguages] = useState({})
  const [defaultLanguage, setDefaultLanguage] = useState('en')

  // Helper function to detect browser language
  const detectBrowserLanguage = (): string => {
    try {
      if (typeof window === 'undefined' || typeof navigator === 'undefined') {
        return 'en'; // Default to English on server-side
      }

      // Get browser language (navigator.language returns full locale like 'en-US')
      const browserLang = navigator.language || (navigator as any).userLanguage || '';
      console.log('Detected browser language:', browserLang);

      if (!browserLang) {
        return 'en'; // Default to English if browser language is not available
      }

      // Extract the language code (first 2 characters)
      const langCode = browserLang.split('-')[0].toLowerCase();
      console.log('Extracted language code:', langCode);

      // Check if the detected language is supported
      if (locales.includes(langCode as any)) {
        console.log('Language supported, using:', langCode);
        return langCode;
      }

      // Special case for Chinese variants
      if (langCode === 'zh') {
        console.log('Chinese language detected');
        // Check for traditional Chinese variants
        if (browserLang.includes('TW') || browserLang.includes('HK')) {
          console.log('Traditional Chinese variant detected');
          return 'zh'; // Use Mandarin for traditional Chinese
        }
        return 'zh'; // Use Mandarin for simplified Chinese
      }

      console.log('Language not supported, defaulting to English');
      return 'en'; // Default to English if not supported
    } catch (error) {
      console.error('Error detecting browser language:', error);
      return 'en'; // Default to English on error
    }
  };

  useEffect(() => {
    const getSupportedLanguages = async () => {
      try {
        const response = await fetch('/api/lang/config');
        if (!response.ok) {
          throw new Error(`HTTP error! status: ${response.status}`);
        }
        const data = await response.json();
        setSupportedLanguages(data.supported_languages);
        setDefaultLanguage(data.default);
      } catch (err) {
        console.error("Failed to fetch auth status:", err);
        // Assuming auth is required if fetch fails to avoid blocking UI for safety
        const defaultSupportedLanguages = {
          "en": "English",
          "ja": "Japanese (日本語)",
          "zh": "Mandarin Chinese (中文)",
          "zh-tw": "Traditional Chinese (繁體中文)",
          "es": "Spanish (Español)",
          "kr": "Korean (한국어)",
          "vi": "Vietnamese (Tiếng Việt)",
          "pt-br": "Brazilian Portuguese (Português Brasileiro)",
          "fr": "Français (French)",
          "ru": "Русский (Russian)"
        };
        setSupportedLanguages(defaultSupportedLanguages);
        setDefaultLanguage("en");
      }
    }
    getSupportedLanguages();
  }, []);

  useEffect(() => {
    if (Object.keys(supportedLanguages).length > 0) {
      const loadLanguage = async () => {
        try {
          // Only access localStorage in the browser
          let storedLanguage;
          if (typeof window !== 'undefined') {
            storedLanguage = localStorage.getItem('language');
    
            // If no language is stored, detect browser language
            if (!storedLanguage) {
              console.log('No language in localStorage, detecting browser language');
              storedLanguage = detectBrowserLanguage();
    
              // Store the detected language
              localStorage.setItem('language', storedLanguage);
            }
          } else {
            console.log('Running on server-side, using default language');
            storedLanguage = 'en';
          }
    
          console.log('Supported languages loaded, validating language:', storedLanguage);
          const validLanguage = Object.keys(supportedLanguages).includes(storedLanguage as any) ? storedLanguage : defaultLanguage;
          console.log('Valid language determined:', validLanguage);
    
          // Load messages for the language
          const langMessages = (await import(`../messages/${validLanguage}.json`)).default;
    
          setLanguageState(validLanguage);
          setMessages(langMessages);
    
          // Update HTML lang attribute (only in browser)
          if (typeof document !== 'undefined') {
            document.documentElement.lang = validLanguage;
          }
        } catch (error) {
          console.error('Failed to load language:', error);
          // Fallback to English
          console.log('Falling back to English due to error');
          const enMessages = (await import('../messages/en.json')).default;
          setMessages(enMessages);
        } finally {
          setIsLoading(false);
        }
      };
      
      loadLanguage();
    }
  }, [supportedLanguages, defaultLanguage]);

  // Update language and load new messages
  const setLanguage = async (lang: string) => {
    try {
      console.log('Setting language to:', lang);
      const validLanguage = Object.keys(supportedLanguages).includes(lang as any) ? lang : defaultLanguage;

      // Load messages for the new language
      const langMessages = (await import(`../messages/${validLanguage}.json`)).default;

      setLanguageState(validLanguage);
      setMessages(langMessages);

      // Store in localStorage (only in browser)
      if (typeof window !== 'undefined') {
        localStorage.setItem('language', validLanguage);
      }

      // Update HTML lang attribute (only in browser)
      if (typeof document !== 'undefined') {
        document.documentElement.lang = validLanguage;
      }
    } catch (error) {
      console.error('Failed to set language:', error);
    }
  };

  if (isLoading) {
    return (
      <div className="flex items-center justify-center h-screen bg-gray-100 dark:bg-gray-900">
        <div className="text-center">
          <div className="animate-spin rounded-full h-12 w-12 border-t-2 border-b-2 border-purple-500 mx-auto mb-4"></div>
          <p className="text-gray-600 dark:text-gray-400">Loading...</p>
        </div>
      </div>
    );
  }

  return (
    <LanguageContext.Provider value={{ language, setLanguage, messages, supportedLanguages }}>
      {children}
    </LanguageContext.Provider>
  );
}

export function useLanguage() {
  const context = useContext(LanguageContext);
  if (context === undefined) {
    throw new Error('useLanguage must be used within a LanguageProvider');
  }
  return context;
}


================================================
FILE: src/hooks/useProcessedProjects.ts
================================================
import { useState, useEffect } from 'react';

interface ProcessedProject {
  id: string;
  owner: string;
  repo: string;
  name: string;
  repo_type: string;
  submittedAt: number;
  language: string;
}

export function useProcessedProjects() {
  const [projects, setProjects] = useState<ProcessedProject[]>([]);
  const [isLoading, setIsLoading] = useState(true);
  const [error, setError] = useState<string | null>(null);

  useEffect(() => {
    const fetchProjects = async () => {
      setIsLoading(true);
      setError(null);
      try {
        const response = await fetch('/api/wiki/projects');
        if (!response.ok) {
          throw new Error(`Failed to fetch projects: ${response.statusText}`);
        }
        const data = await response.json();
        if (data.error) {
          throw new Error(data.error);
        }
        setProjects(data as ProcessedProject[]);
      } catch (e: unknown) {
        console.error("Failed to load projects from API:", e);
        const message = e instanceof Error ? e.message : "An unknown error occurred.";
        setError(message);
        setProjects([]);
      } finally {
        setIsLoading(false);
      }
    };

    fetchProjects();
  }, []);

  return { projects, isLoading, error };
}


================================================
FILE: src/i18n.ts
================================================
import { getRequestConfig } from 'next-intl/server';

// Define the list of supported locales
export const locales = ['en', 'ja', 'zh', 'es', 'kr', 'vi', 'pt-br'];

export default getRequestConfig(async ({ locale }) => {
  // Use a default locale if the requested one isn't supported
  const safeLocale = locales.includes(locale as string) ? locale : 'en';

  return {
    locale: safeLocale as string,
    messages: (await import(`./messages/${safeLocale}.json`)).default
  };
});


================================================
FILE: src/messages/en.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "AI-powered documentation",
    "generateWiki": "Generate Wiki",
    "processing": "Processing...",
    "error": "Error",
    "submit": "Submit",
    "cancel": "Cancel",
    "close": "Close",
    "loading": "Loading..."
  },
  "loading": {
    "initializing": "Initializing wiki generation...",
    "fetchingStructure": "Fetching repository structure...",
    "determiningStructure": "Determining wiki structure...",
    "clearingCache": "Clearing server cache...",
    "preparingDownload": "Please wait while we prepare your download..."
  },
  "home": {
    "welcome": "Welcome to DeepWiki-Open",
    "welcomeTagline": "AI-powered documentation for your code repositories",
    "description": "Generate comprehensive documentation from GitHub, GitLab, or Bitbucket repositories with just a few clicks.",
    "quickStart": "Quick Start",
    "enterRepoUrl": "Enter a repository URL in one of these formats:",
    "advancedVisualization": "Advanced Visualization with Mermaid Diagrams",
    "diagramDescription": "DeepWiki automatically generates interactive diagrams to help you understand code structure and relationships:",
    "flowDiagram": "Flow Diagram",
    "sequenceDiagram": "Sequence Diagram"
  },
  "form": {
    "repository": "Repository",
    "configureWiki": "Configure Wiki",
    "repoPlaceholder": "owner/repo or GitHub/GitLab/Bitbucket URL",
    "wikiLanguage": "Wiki Language",
    "modelOptions": "Model Options",
    "modelProvider": "Model Provider",
    "modelSelection": "Model Selection",
    "wikiType": "Wiki Type",
    "comprehensive": "Comprehensive",
    "concise": "Concise",
    "comprehensiveDescription": "Detailed wiki with structured sections and more pages",
    "conciseDescription": "Simplified wiki with fewer pages and essential information",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (Local)",
    "localOllama": "Local Ollama Model",
    "experimental": "Experimental",
    "useOpenRouter": "Use OpenRouter API",
    "openRouterModel": "OpenRouter Model",
    "useOpenai": "Use Openai API",
    "openaiModel": "Openai Model",
    "useCustomModel": "Use custom model",
    "customModelPlaceholder": "Enter custom model name",
    "addTokens": "+ Add access tokens for private repositories",
    "hideTokens": "- Hide access tokens",
    "accessToken": "Access Token for Private Repositories",
    "selectPlatform": "Select Platform",
    "personalAccessToken": "{platform} Personal Access Token",
    "tokenPlaceholder": "Enter your {platform} token",
    "tokenSecurityNote": "Token is stored in memory only and never persisted.",
    "defaultFiltersInfo": "Default filters include common directories like node_modules, .git, and common build artifact files.",
    "fileFilterTitle": "File Filter Configuration",
    "advancedOptions": "Advanced Options",
    "viewDefaults": "View Default Filters",
    "showFilters": "Show Filters",
    "hideFilters": "Hide Filters",
    "excludedDirs": "Directories to Exclude",
    "excludedDirsHelp": "One directory path per line. Paths starting with ./ are relative to repository root.",
    "enterExcludedDirs": "Enter excluded directories, one per line...",
    "excludedFiles": "Files to Exclude",
    "excludedFilesHelp": "One filename per line. Wildcards (*) are supported.",
    "enterExcludedFiles": "Enter excluded files, one per line...",
    "defaultFilters": "Default Excluded Files & Directories",
    "directories": "Directories",
    "files": "Files",
    "scrollToViewMore": "Scroll to view more",
    "changeModel": "Change Model",
    "defaultNote": "These defaults are already applied. Add your custom exclusions above.",
    "hideDefault": "Hide Default",
    "viewDefault": "View Default",
    "includedDirs": "Included Directories",
    "includedFiles": "Included Files",
    "enterIncludedDirs": "Enter included directories, one per line...",
    "enterIncludedFiles": "Enter included files, one per line...",
    "filterMode": "Filter Mode",
    "excludeMode": "Exclude Paths",
    "includeMode": "Include Only Paths",
    "excludeModeDescription": "Specify paths to exclude from processing (default behavior)",
    "includeModeDescription": "Specify only the paths to include, ignoring all others",
    "authorizationCode": "Authorization Code",
    "authorizationRequired": "Authentication is required to generate the wiki."
  },
  "footer": {
    "copyright": "DeepWiki - AI-powered documentation for code repositories"
  },
  "ask": {
    "placeholder": "Ask a question about this repository...",
    "askButton": "Ask",
    "deepResearch": "Deep Research",
    "researchInProgress": "Research in progress...",
    "continueResearch": "Continue Research",
    "viewPlan": "View Plan",
    "viewUpdates": "View Updates",
    "viewConclusion": "View Conclusion"
  },
  "repoPage": {
    "refreshWiki": "Refresh Wiki",
    "confirmRefresh": "Confirm Refresh",
    "cancel": "Cancel",
    "home": "Home",
    "errorTitle": "Error",
    "errorMessageDefault": "Please check that your repository exists and is public. Valid formats are \"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\", or local folder paths like \"C:\\\\path\\\\to\\\\folder\" or \"/path/to/folder\".",
    "embeddingErrorDefault": "This error is related to the document embedding system used for analyzing your repository. Please verify your embedding model configuration, API keys, and try again. If the issue persists, consider switching to a different embedding provider in the model settings.",
    "backToHome": "Back to Home",
    "exportWiki": "Export Wiki",
    "exportAsMarkdown": "Export as Markdown",
    "exportAsJson": "Export as JSON",
    "pages": "Pages",
    "relatedFiles": "Related Files:",
    "relatedPages": "Related Pages:",
    "selectPagePrompt": "Select a page from the navigation to view its content",
    "askAboutRepo": "Ask questions about this repository"
  },
  "nav": {
    "wikiProjects": "Wiki Projects"
  },
  "projects": {
    "title": "Processed Wiki Projects",
    "searchPlaceholder": "Search projects by name, owner, or repository...",
    "noProjects": "No projects found in the server cache. The cache might be empty or the server encountered an issue.",
    "noSearchResults": "No projects match your search criteria.",
    "processedOn": "Processed on:",
    "loadingProjects": "Loading projects...",
    "errorLoading": "Error loading projects:",
    "backToHome": "Back to Home",
    "browseExisting": "Browse Existing Projects",
    "existingProjects": "Existing Projects",
    "recentProjects": "Recent Projects"
  }
}


================================================
FILE: src/messages/es.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "Documentación impulsada por IA",
    "generateWiki": "Generar Wiki",
    "processing": "Procesando...",
    "error": "Error",
    "submit": "Enviar",
    "cancel": "Cancelar",
    "close": "Cerrar",
    "loading": "Cargando..."
  },
  "loading": {
    "initializing": "Inicializando generación de wiki...",
    "fetchingStructure": "Obteniendo estructura del repositorio...",
    "determiningStructure": "Determinando estructura del wiki...",
    "clearingCache": "Limpiando caché del servidor...",
    "preparingDownload": "Por favor espere mientras preparamos su descarga..."
  },
  "home": {
    "welcome": "Bienvenido a DeepWiki",
    "welcomeTagline": "Documentación impulsada por IA para repositorios de código",
    "description": "Genera documentación completa de repositorios GitHub, GitLab o Bitbucket con solo unos clics.",
    "quickStart": "Inicio Rápido",
    "enterRepoUrl": "Ingresa una URL de repositorio en uno de estos formatos:",
    "advancedVisualization": "Visualización Avanzada con Diagramas Mermaid",
    "diagramDescription": "DeepWiki genera automáticamente diagramas interactivos para ayudarte a entender la estructura y relaciones del código:",
    "flowDiagram": "Diagrama de Flujo",
    "sequenceDiagram": "Diagrama de Secuencia"
  },
  "form": {
    "repository": "Repositorio",
    "configureWiki": "Configurar Wiki",
    "repoPlaceholder": "propietario/repositorio o URL de GitHub/GitLab/Bitbucket",
    "wikiLanguage": "Idioma del Wiki",
    "modelOptions": "Opciones de Modelo",
    "modelProvider": "Proveedor de Modelo",
    "modelSelection": "Selección de Modelo",
    "wikiType": "Tipo de Wiki",
    "comprehensive": "Completo",
    "concise": "Conciso",
    "comprehensiveDescription": "Wiki detallado con secciones estructuradas y más páginas",
    "conciseDescription": "Wiki simplificado con menos páginas e información esencial",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (Local)",
    "localOllama": "Modelo Ollama Local",
    "experimental": "Experimental",
    "useOpenRouter": "Usar API de OpenRouter",
    "openRouterModel": "Modelo OpenRouter",
    "useOpenai": "Usar API de Openai",
    "openaiModel": "Modelo Openai",
    "useCustomModel": "Usar modelo personalizado",
    "customModelPlaceholder": "Ingrese nombre de modelo personalizado",
    "addTokens": "+ Agregar tokens de acceso para repositorios privados",
    "hideTokens": "- Ocultar tokens de acceso",
    "accessToken": "Token de Acceso para Repositorios Privados",
    "selectPlatform": "Seleccionar Plataforma",
    "personalAccessToken": "Token de Acceso Personal de {platform}",
    "tokenPlaceholder": "Ingresa tu token de {platform}",
    "tokenSecurityNote": "El token solo se almacena en memoria y nunca se persiste.",
    "defaultFiltersInfo": "Los filtros predeterminados incluyen directorios comunes como node_modules, .git y archivos de artefactos de construcción comunes.",
    "fileFilterTitle": "Configuración de Filtros de Archivos",
    "advancedOptions": "Opciones Avanzadas",
    "viewDefaults": "Ver Filtros Predeterminados",
    "showFilters": "Mostrar Filtros",
    "hideFilters": "Ocultar Filtros",
    "excludedDirs": "Directorios a Excluir",
    "excludedDirsHelp": "Una ruta de directorio por línea. Las rutas que comienzan con ./ son relativas a la raíz del repositorio.",
    "enterExcludedDirs": "Ingrese directorios a excluir, uno por línea...",
    "excludedFiles": "Archivos a Excluir",
    "excludedFilesHelp": "Un nombre de archivo por línea. Se admiten comodines (*).",
    "enterExcludedFiles": "Ingrese archivos a excluir, uno por línea...",
    "defaultFilters": "Archivos y Directorios Excluidos por Defecto",
    "directories": "Directorios",
    "files": "Archivos",
    "scrollToViewMore": "Desplazar para ver más",
    "changeModel": "Cambiar Modelo",
    "defaultNote": "Estos valores predeterminados ya están aplicados. Agregue sus exclusiones personalizadas arriba.",
    "hideDefault": "Ocultar Predeterminados",
    "viewDefault": "Ver Predeterminados",
    "authorizationCode": "Código de Autorización",
    "authorizationRequired": "Generar Wiki requiere código de autorización"
  },
  "footer": {
    "copyright": "DeepWiki - Documentación impulsada por IA para repositorios de código"
  },
  "ask": {
    "placeholder": "Haz una pregunta sobre este repositorio...",
    "askButton": "Preguntar",
    "deepResearch": "Investigación Profunda",
    "researchInProgress": "Investigación en progreso...",
    "continueResearch": "Continuar Investigación",
    "viewPlan": "Ver Plan",
    "viewUpdates": "Ver Actualizaciones",
    "viewConclusion": "Ver Conclusión"
  },
  "repoPage": {
    "refreshWiki": "Actualizar Wiki",
    "confirmRefresh": "Confirmar Actualización",
    "cancel": "Cancelar",
    "home": "Inicio",
    "errorTitle": "Error",
    "errorMessageDefault": "Por favor, compruebe que su repositorio existe y es público. Los formatos válidos son \"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\", o rutas de carpetas locales como \"C:\\\\path\\\\to\\\\folder\" o \"/path/to/folder\".",
    "embeddingErrorDefault": "Este error está relacionado con el sistema de embebido utilizado para analizar su repositorio. Por favor, verifique la configuración del modelo de embebido, las claves de API y vuelva a intentarlo. Si el problema persiste, considere cambiar al proveedor de embebido diferente en la configuración del modelo.",
    "backToHome": "Volver al Inicio",
    "exportWiki": "Exportar Wiki",
    "exportAsMarkdown": "Exportar como Markdown",
    "exportAsJson": "Exportar como JSON",
    "pages": "Páginas",
    "relatedFiles": "Archivos Relacionados:",
    "relatedPages": "Páginas Relacionadas:",
    "selectPagePrompt": "Seleccione una página de la navegación para ver su contenido",
    "askAboutRepo": "Hacer preguntas sobre este repositorio"
  },
  "nav": {
    "wikiProjects": "Lista de Proyectos"
  },
  "projects": {
    "title": "Proyectos Wiki Procesados",
    "searchPlaceholder": "Buscar proyectos por nombre, propietario o repositorio...",
    "noProjects": "No se encontraron proyectos en la caché del servidor. La caché podría estar vacía o el servidor encontró un problema.",
    "noSearchResults": "Ningún proyecto coincide con sus criterios de búsqueda.",
    "processedOn": "Procesado el:",
    "loadingProjects": "Cargando proyectos...",
    "errorLoading": "Error al cargar proyectos:",
    "backToHome": "Volver al Inicio",
    "browseExisting": "Explorar Proyectos Existentes",
    "existingProjects": "Proyectos Existentes",
    "recentProjects": "Proyectos Recientes"
  }
}


================================================
FILE: src/messages/fr.json
================================================
{
    "common": {
      "appName": "DeepWiki-Open",
      "tagline": "Documentation propulsée par l’IA",
      "generateWiki": "Générer un Wiki",
      "processing": "Traitement en cours...",
      "error": "Erreur",
      "submit": "Soumettre",
      "cancel": "Annuler",
      "close": "Fermer",
      "loading": "Chargement..."
    },
    "loading": {
      "initializing": "Initialisation de la génération du wiki...",
      "fetchingStructure": "Récupération de la structure du dépôt...",
      "determiningStructure": "Détermination de la structure du wiki...",
      "clearingCache": "Nettoyage du cache serveur...",
      "preparingDownload": "Veuillez patienter pendant que nous préparons votre téléchargement..."
    },
    "home": {
      "welcome": "Bienvenue sur DeepWiki-Open",
      "welcomeTagline": "Documentation propulsée par l’IA pour vos dépôts de code",
      "description": "Générez une documentation complète à partir de dépôts GitHub, GitLab ou Bitbucket en quelques clics.",
      "quickStart": "Démarrage rapide",
      "enterRepoUrl": "Entrez une URL de dépôt dans l’un des formats suivants :",
      "advancedVisualization": "Visualisation avancée avec des diagrammes Mermaid",
      "diagramDescription": "DeepWiki génère automatiquement des diagrammes interactifs pour vous aider à comprendre la structure du code et ses relations :",
      "flowDiagram": "Diagramme de flux",
      "sequenceDiagram": "Diagramme de séquence"
    },
    "form": {
      "repository": "Dépôt",
      "configureWiki": "Configurer le Wiki",
      "repoPlaceholder": "propriétaire/dépôt ou URL GitHub/GitLab/Bitbucket",
      "wikiLanguage": "Langue du Wiki",
      "modelOptions": "Options du Modèle",
      "modelProvider": "Fournisseur du Modèle",
      "modelSelection": "Sélection du Modèle",
      "wikiType": "Type de Wiki",
      "comprehensive": "Complet",
      "concise": "Concis",
      "comprehensiveDescription": "Wiki détaillé avec des sections structurées et plus de pages",
      "conciseDescription": "Wiki simplifié avec moins de pages et les informations essentielles",
      "providerGoogle": "Google",
      "providerOpenAI": "OpenAI",
      "providerOpenRouter": "OpenRouter",
      "providerOllama": "Ollama (Local)",
      "localOllama": "Modèle Ollama local",
      "experimental": "Expérimental",
      "useOpenRouter": "Utiliser l’API OpenRouter",
      "openRouterModel": "Modèle OpenRouter",
      "useOpenai": "Utiliser l’API OpenAI",
      "openaiModel": "Modèle OpenAI",
      "useCustomModel": "Utiliser un modèle personnalisé",
      "customModelPlaceholder": "Entrez le nom du modèle personnalisé",
      "addTokens": "+ Ajouter des jetons d’accès pour les dépôts privés",
      "hideTokens": "- Masquer les jetons d’accès",
      "accessToken": "Jeton d’accès pour les dépôts privés",
      "selectPlatform": "Sélectionnez une plateforme",
      "personalAccessToken": "Jeton d’accès personnel {platform}",
      "tokenPlaceholder": "Entrez votre jeton {platform}",
      "tokenSecurityNote": "Le jeton est stocké uniquement en mémoire et jamais sauvegardé.",
      "defaultFiltersInfo": "Les filtres par défaut incluent les répertoires courants comme node_modules, .git et les fichiers de build.",
      "fileFilterTitle": "Configuration du filtre de fichiers",
      "advancedOptions": "Options avancées",
      "viewDefaults": "Voir les filtres par défaut",
      "showFilters": "Afficher les filtres",
      "hideFilters": "Masquer les filtres",
      "excludedDirs": "Répertoires à exclure",
      "excludedDirsHelp": "Un chemin de répertoire par ligne. Les chemins commençant par ./ sont relatifs à la racine du dépôt.",
      "enterExcludedDirs": "Entrez les répertoires à exclure, un par ligne...",
      "excludedFiles": "Fichiers à exclure",
      "excludedFilesHelp": "Un nom de fichier par ligne. Les jokers (*) sont pris en charge.",
      "enterExcludedFiles": "Entrez les fichiers à exclure, un par ligne...",
      "defaultFilters": "Fichiers & répertoires exclus par défaut",
      "directories": "Répertoires",
      "files": "Fichiers",
      "scrollToViewMore": "Faites défiler pour en voir plus",
      "changeModel": "Changer de modèle",
      "defaultNote": "Ces valeurs par défaut sont déjà appliquées. Ajoutez vos exclusions personnalisées ci-dessus.",
      "hideDefault": "Masquer les valeurs par défaut",
      "viewDefault": "Afficher les valeurs par défaut",
      "includedDirs": "Répertoires inclus",
      "includedFiles": "Fichiers inclus",
      "enterIncludedDirs": "Entrez les répertoires à inclure, un par ligne...",
      "enterIncludedFiles": "Entrez les fichiers à inclure, un par ligne...",
      "filterMode": "Mode de filtrage",
      "excludeMode": "Exclure des chemins",
      "includeMode": "Inclure uniquement ces chemins",
      "excludeModeDescription": "Spécifie les chemins à exclure du traitement (comportement par défaut)",
      "includeModeDescription": "Spécifie uniquement les chemins à inclure, en ignorant les autres",
      "authorizationCode": "Code d’autorisation",
      "authorizationRequired": "Une authentification est requise pour générer le wiki."
    },
    "footer": {
      "copyright": "DeepWiki - Documentation assistée par l'IA pour les dépôts de code"
    },
    "ask": {
      "placeholder": "Posez une question sur ce dépôt...",
      "askButton": "Poser la question",
      "deepResearch": "Recherche approfondie",
      "researchInProgress": "Recherche en cours...",
      "continueResearch": "Continuer la recherche",
      "viewPlan": "Voir le plan",
      "viewUpdates": "Voir les mises à jour",
      "viewConclusion": "Voir la conclusion"
    },
    "repoPage": {
      "refreshWiki": "Rafraîchir le Wiki",
      "confirmRefresh": "Confirmer le rafraîchissement",
      "cancel": "Annuler",
      "home": "Accueil",
      "errorTitle": "Erreur",
      "errorMessageDefault": "Veuillez vérifier que votre dépôt existe et est public. Les formats valides sont \"propriétaire/dépôt\", \"https://github.com/propriétaire/dépôt\", \"https://gitlab.com/propriétaire/dépôt\", \"https://bitbucket.org/propriétaire/dépôt\" ou des chemins locaux comme \"C:\\\\chemin\\\\vers\\\\dossier\" ou \"/chemin/vers/dossier\".",
      "embeddingErrorDefault": "Cette erreur est liée au système d’indexation utilisé pour analyser votre dépôt. Veuillez vérifier la configuration du modèle d’indexation, les clés API, puis réessayez. Si le problème persiste, envisagez d’utiliser un autre fournisseur d’indexation dans les paramètres du modèle.",
      "backToHome": "Retour à l’accueil",
      "exportWiki": "Exporter le Wiki",
      "exportAsMarkdown": "Exporter en Markdown",
      "exportAsJson": "Exporter en JSON",
      "pages": "Pages",
      "relatedFiles": "Fichiers associés :",
      "relatedPages": "Pages associées :",
      "selectPagePrompt": "Sélectionnez une page dans la navigation pour en voir le contenu",
      "askAboutRepo": "Poser des questions sur ce dépôt"
    },
    "nav": {
      "wikiProjects": "Projets Wiki"
    },
    "projects": {
      "title": "Projets Wiki traités",
      "searchPlaceholder": "Rechercher des projets par nom, propriétaire ou dépôt...",
      "noProjects": "Aucun projet trouvé dans le cache du serveur. Le cache est peut-être vide ou le serveur a rencontré un problème.",
      "noSearchResults": "Aucun projet ne correspond à vos critères de recherche.",
      "processedOn": "Traité le :",
      "loadingProjects": "Chargement des projets...",
      "errorLoading": "Erreur lors du chargement des projets :",
      "backToHome": "Retour à l’accueil",
      "browseExisting": "Parcourir les projets existants",
      "existingProjects": "Projets existants",
      "recentProjects": "Projets récents"
    }
  }
  

================================================
FILE: src/messages/ja.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "AI駆動のドキュメンテーション",
    "generateWiki": "Wikiを生成",
    "processing": "処理中...",
    "error": "エラー",
    "submit": "送信",
    "cancel": "キャンセル",
    "close": "閉じる",
    "loading": "読み込み中..."
  },
  "loading": {
    "initializing": "Wiki生成を初期化中...",
    "fetchingStructure": "リポジトリ構造を取得中...",
    "determiningStructure": "Wiki構造を決定中...",
    "clearingCache": "サーバーキャッシュをクリア中...",
    "preparingDownload": "ダウンロードの準備中です..."
  },
  "home": {
    "welcome": "DeepWikiへようこそ",
    "welcomeTagline": "コードリポジトリのためのAI駆動ドキュメンテーション",
    "description": "GitHub、GitLab、またはBitbucketリポジトリから包括的なドキュメントを数クリックで生成します。",
    "quickStart": "クイックスタート",
    "enterRepoUrl": "以下のいずれかの形式でリポジトリURLを入力してください：",
    "advancedVisualization": "Mermaidダイアグラムによる高度な可視化",
    "diagramDescription": "DeepWikiは、コード構造と関係を理解するのに役立つインタラクティブな図を自動的に生成します：",
    "flowDiagram": "フロー図",
    "sequenceDiagram": "シーケンス図"
  },
  "form": {
    "repository": "リポジトリ",
    "configureWiki": "Wiki設定",
    "repoPlaceholder": "所有者/リポジトリまたはGitHub/GitLab/BitbucketのURL",
    "wikiLanguage": "Wiki言語",
    "modelOptions": "モデルオプション",
    "modelProvider": "モデルプロバイダー",
    "modelSelection": "モデル選択",
    "wikiType": "Wikiタイプ",
    "comprehensive": "包括的",
    "concise": "簡潔",
    "comprehensiveDescription": "構造化されたセクションとより多くのページを持つ詳細なWiki",
    "conciseDescription": "ページ数が少なく、必要な情報のみを含む簡素化されたWiki",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama（ローカル）",
    "localOllama": "ローカルOllamaモデル",
    "experimental": "実験的",
    "useOpenRouter": "OpenRouter APIを使用",
    "openRouterModel": "OpenRouterモデル",
    "useOpenai": "Openai APIを使用",
    "openaiModel": "Openaiモデル",
    "useCustomModel": "カスタムモデルを使用",
    "customModelPlaceholder": "カスタムモデル名を入力",
    "addTokens": "+ プライベートリポジトリ用のアクセストークンを追加",
    "hideTokens": "- アクセストークンを隠す",
    "accessToken": "プライベートリポジトリ用のアクセストークン",
    "selectPlatform": "プラットフォームを選択",
    "personalAccessToken": "{platform}個人アクセストークン",
    "tokenPlaceholder": "{platform}トークンを入力してください",
    "tokenSecurityNote": "トークンはメモリ内にのみ保存され、永続化されることはありません。",
    "defaultFiltersInfo": "デフォルトのフィルターは、node_modules、.git、および一般的なビルドアーティファクトファイルのような一般的なディレクトリを含みます。",
    "fileFilterTitle": "ファイルフィルター設定",
    "advancedOptions": "詳細オプション",
    "viewDefaults": "デフォルトフィルターを表示",
    "showFilters": "フィルターを表示",
    "hideFilters": "フィルターを非表示",
    "excludedDirs": "除外するディレクトリ",
    "excludedDirsHelp": "一行につき一つのディレクトリパス。./で始まるパスはリポジトリルートからの相対パスです。",
    "enterExcludedDirs": "除外するディレクトリを一行ずつ入力...",
    "excludedFiles": "除外するファイル",
    "excludedFilesHelp": "一行につき一つのファイル名。ワイルドカード(*)が使用可能です。",
    "enterExcludedFiles": "除外するファイルを一行ずつ入力...",
    "defaultFilters": "デフォルトで除外されるファイルとディレクトリ",
    "directories": "ディレクトリ",
    "files": "ファイル",
    "scrollToViewMore": "スクロールしてさらに表示",
    "changeModel": "モデルを変更",
    "defaultNote": "これらのデフォルト設定は既に適用されています。上記に追加の除外項目を入力してください。",
    "hideDefault": "デフォルトを隠す",
    "viewDefault": "デフォルトを表示",
    "authorizationCode": "認証コード",
    "authorizationRequired": "Wiki生成には認証コードが必要です"
  },
  "footer": {
    "copyright": "DeepWiki - コードリポジトリのためのAI駆動ドキュメンテーション"
  },
  "ask": {
    "placeholder": "このリポジトリについて質問する...",
    "askButton": "質問する",
    "deepResearch": "詳細調査",
    "researchInProgress": "調査進行中...",
    "continueResearch": "調査を続ける",
    "viewPlan": "計画を見る",
    "viewUpdates": "更新を見る",
    "viewConclusion": "結論を見る"
  },
  "repoPage": {
    "refreshWiki": "Wikiを更新",
    "confirmRefresh": "更新を確認",
    "cancel": "キャンセル",
    "home": "ホーム",
    "errorTitle": "エラー",
    "errorMessageDefault": "リポジトリが存在し、公開されていることを確認してください。有効な形式は「owner/repo」、「https://github.com/owner/repo」、「https://gitlab.com/owner/repo」、「https://bitbucket.org/owner/repo」、またはローカルフォルダパス（例: 「C:\\\\path\\\\to\\\\folder」、「/path/to/folder」）です。",
    "embeddingErrorDefault": "このエラーは、リポジトリを分析するために使用されるドキュメント埋め込みシステムに関連しています。モデル設定で異なる埋め込みプロバイダーを試してみてください。",
    "backToHome": "ホームに戻る",
    "exportWiki": "Wikiをエクスポート",
    "exportAsMarkdown": "Markdownとしてエクスポート",
    "exportAsJson": "JSONとしてエクスポート",
    "pages": "ページ",
    "relatedFiles": "関連ファイル:",
    "relatedPages": "関連ページ:",
    "selectPagePrompt": "ナビゲーションからページを選択してコンテンツを表示",
    "askAboutRepo": "このリポジトリについて質問する"
  },
  "nav": {
    "wikiProjects": "プロジェクト一覧"
  },
  "projects": {
    "title": "処理済みWikiプロジェクト",
    "searchPlaceholder": "プロジェクト名、所有者、リポジトリで検索...",
    "noProjects": "サーバーキャッシュにプロジェクトが見つかりません。キャッシュが空であるか、サーバーで問題が発生した可能性があります。",
    "noSearchResults": "検索条件に一致するプロジェクトがありません。",
    "processedOn": "処理日時:",
    "loadingProjects": "プロジェクトを読み込み中...",
    "errorLoading": "プロジェクトの読み込みエラー:",
    "backToHome": "ホームに戻る",
    "browseExisting": "既存プロジェクトを閲覧",
    "existingProjects": "既存プロジェクト",
    "recentProjects": "最近のプロジェクト"
  }
}


================================================
FILE: src/messages/kr.json
================================================
{
    "common": {
      "appName": "DeepWiki-Open",
      "tagline": "AI 기반 문서화",
      "generateWiki": "위키 생성",
      "processing": "처리 중...",
      "error": "오류",
      "submit": "제출",
      "cancel": "취소",
      "close": "닫기",
      "loading": "로딩 중..."
    },
    "loading": {
      "initializing": "위키 생성을 초기화하는 중...",
      "fetchingStructure": "저장소 구조를 가져오는 중...",
      "determiningStructure": "위키 구조를 결정하는 중...",
      "clearingCache": "서버 캐시를 지우는 중...",
      "preparingDownload": "다운로드를 준비 중입니다. 잠시만 기다려 주세요..."
    },
    "home": {
      "welcome": "DeepWiki-Open에 오신 것을 환영합니다",
      "welcomeTagline": "코드 저장소를 위한 AI 기반 문서화",
      "description": "GitHub, GitLab 또는 Bitbucket 저장소에서 클릭 한 번으로 종합 문서를 생성하세요.",
      "quickStart": "빠른 시작",
      "enterRepoUrl": "다음 형식 중 하나로 저장소 URL을 입력하세요:",
      "advancedVisualization": "Mermaid 다이어그램을 활용한 고급 시각화",
      "diagramDescription": "DeepWiki는 코드 구조와 관계를 이해하는 데 도움이 되는 대화형 다이어그램을 자동 생성합니다:",
      "flowDiagram": "흐름도",
      "sequenceDiagram": "시퀀스 다이어그램"
    },
    "form": {
      "repository": "저장소",
      "configureWiki": "위키 구성",
      "repoPlaceholder": "owner/repo 또는 GitHub/GitLab/Bitbucket URL",
      "wikiLanguage": "위키 언어",
      "modelOptions": "모델 옵션",
      "modelProvider": "모델 제공자",
      "modelSelection": "모델 선택",
      "wikiType": "위키 유형",
      "comprehensive": "종합적",
      "concise": "간결함",
      "comprehensiveDescription": "구조화된 섹션과 더 많은 페이지가 있는 상세한 위키",
      "conciseDescription": "페이지 수가 적고 필수 정보만 포함된 간소화된 위키",
      "providerGoogle": "구글",
      "providerOpenAI": "OpenAI",
      "providerOpenRouter": "OpenRouter",
      "providerOllama": "Ollama (로컬)",
      "localOllama": "로컬 Ollama 모델",
      "experimental": "실험적",
      "useOpenRouter": "OpenRouter API 사용",
      "openRouterModel": "OpenRouter 모델",
      "useOpenai": "Openai API 사용",
      "openaiModel": "Openai 모델",
      "useCustomModel": "사용자 정의 모델 사용",
      "customModelPlaceholder": "사용자 정의 모델 이름 입력",
      "addTokens": "+ 비공개 저장소 액세스 토큰 추가",
      "hideTokens": "- 액세스 토큰 숨기기",
      "accessToken": "비공개 저장소용 액세스 토큰",
      "selectPlatform": "플랫폼 선택",
      "personalAccessToken": "{platform} 개인 액세스 토큰",
      "tokenPlaceholder": "{platform} 토큰을 입력하세요",
      "tokenSecurityNote": "토큰은 메모리에만 저장되며, 영구적으로 보존되지 않습니다.",
      "defaultFiltersInfo": "기본 필터에는 node_modules,.git 및 일반적인 빌드 파일이 포함됩니다.",
      "fileFilterTitle": "파일 필터 구성",
      "advancedOptions": "고급 옵션",
      "viewDefaults": "기본 필터 보기",
      "showFilters": "필터 표시",
      "hideFilters": "필터 숨기기",
      "excludedDirs": "제외할 디렉토리",
      "excludedDirsHelp": "한 줄에 하나의 디렉토리 경로. ./로 시작하는 경로는 저장소 루트에서의 상대 경로입니다.",
      "enterExcludedDirs": "제외할 디렉토리를 한 줄에 하나씩 입력하세요...",
      "excludedFiles": "제외할 파일",
      "excludedFilesHelp": "한 줄에 하나의 파일 이름. 와일드카드(*)가 지원됩니다.",
      "enterExcludedFiles": "제외할 파일을 한 줄에 하나씩 입력하세요...",
      "defaultFilters": "기본적으로 제외되는 파일 및 디렉토리",
      "directories": "디렉토리",
      "files": "파일",
      "scrollToViewMore": "더 보려면 스크롤하세요",
      "changeModel": "모델 변경",
      "defaultNote": "이 기본 설정은 이미 적용되었습니다. 위에 사용자 지정 제외 항목을 추가하세요.",
      "hideDefault": "기본값 숨기기",
      "viewDefault": "기본값 보기",
      "authorizationCode": "인증코드",
      "authorizationRequired": "Wiki 생성에는 인증코드가 필요합니다"
    },
    "footer": {
      "copyright": "DeepWiki - 코드 저장소를 위한 AI 기반 문서화"
    },
    "ask": {
      "placeholder": "이 저장소에 대해 질문해 보세요...",
      "askButton": "질문하기",
      "deepResearch": "심층 분석",
      "researchInProgress": "심층 분석 진행 중...",
      "continueResearch": "분석 계속하기",
      "viewPlan": "계획 보기",
      "viewUpdates": "업데이트 보기",
      "viewConclusion": "결론 보기"
    },
    "repoPage": {
      "refreshWiki": "위키 새로고침",
      "confirmRefresh": "새로고침 확인",
      "cancel": "취소",
      "home": "홈",
      "errorTitle": "오류",
      "errorMessageDefault": "저장소가 존재하며 공개 상태인지 확인해 주세요. 유효한 형식은 \"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\" 또는 로컬 폴더 경로 \"C:\\\\path\\\\to\\\\folder\" 혹은 \"/path/to/folder\" 입니다.",
      "embeddingErrorDefault": "이 오류는 저장소를 분석하는 데 사용되는 문서 임베딩 시스템과 관련이 있습니다. 임베딩 모델 설정에서 다른 임베딩 제공자를 시도해 보세요. 문제가 지속되면 모델 설정에서 다른 임베딩 제공자를 변경해 보세요.",
      "backToHome": "홈으로 돌아가기",
      "exportWiki": "위키 내보내기",
      "exportAsMarkdown": "마크다운으로 내보내기",
      "exportAsJson": "JSON으로 내보내기",
      "pages": "페이지",
      "relatedFiles": "관련 파일:",
      "relatedPages": "관련 페이지:",
      "selectPagePrompt": "목록에서 페이지를 선택하여 내용을 확인하세요",
      "askAboutRepo": "이 저장소에 대해 질문하기"
    },
    "nav": {
      "wikiProjects": "프로젝트 목록"
    },
    "projects": {
      "title": "처리된 위키 프로젝트",
      "searchPlaceholder": "프로젝트 이름, 소유자 또는 저장소로 검색...",
      "noProjects": "서버 캐시에서 프로젝트를 찾을 수 없습니다. 캐시가 비어있거나 서버에 문제가 발생했을 수 있습니다.",
      "noSearchResults": "검색 조건에 맞는 프로젝트가 없습니다.",
      "processedOn": "처리 날짜:",
      "loadingProjects": "프로젝트 로딩 중...",
      "errorLoading": "프로젝트 로딩 오류:",
      "backToHome": "홈으로 돌아가기",
      "browseExisting": "기존 프로젝트 탐색",
      "existingProjects": "기존 프로젝트",
      "recentProjects": "최근 프로젝트"
    }
  }


================================================
FILE: src/messages/pt-br.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "Documentação com IA",
    "generateWiki": "Gerar Wiki",
    "processing": "Processando...",
    "error": "Erro",
    "submit": "Enviar",
    "cancel": "Cancelar",
    "close": "Fechar",
    "loading": "Carregando..."
  },
  "loading": {
    "initializing": "Inicializando geração da wiki...",
    "fetchingStructure": "Obtendo estrutura do repositório...",
    "determiningStructure": "Determinando estrutura da wiki...",
    "clearingCache": "Limpando cache do servidor...",
    "preparingDownload": "Aguarde enquanto preparamos seu download..."
  },
  "home": {
    "welcome": "Bem-vindo ao DeepWiki-Open",
    "welcomeTagline": "Documentação com IA para seus repositórios de código",
    "description": "Gere documentação completa a partir de repositórios GitHub, GitLab ou Bitbucket com apenas alguns cliques.",
    "quickStart": "Início Rápido",
    "enterRepoUrl": "Digite uma URL de repositório em um destes formatos:",
    "advancedVisualization": "Visualização Avançada com Diagramas Mermaid",
    "diagramDescription": "O DeepWiki gera automaticamente diagramas interativos para ajudar você a entender a estrutura e os relacionamentos do código:",
    "flowDiagram": "Diagrama de Fluxo",
    "sequenceDiagram": "Diagrama de Sequência"
  },
  "form": {
    "repository": "Repositório",
    "configureWiki": "Configurar Wiki",
    "repoPlaceholder": "proprietário/repo ou URL do GitHub/GitLab/Bitbucket",
    "wikiLanguage": "Idioma da Wiki",
    "modelOptions": "Opções de Modelo",
    "modelProvider": "Provedor de Modelo",
    "modelSelection": "Seleção de Modelo",
    "wikiType": "Tipo de Wiki",
    "comprehensive": "Abrangente",
    "concise": "Concisa",
    "comprehensiveDescription": "Wiki detalhada com seções estruturadas e mais páginas",
    "conciseDescription": "Wiki simplificada com menos páginas e informações essenciais",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (Local)",
    "localOllama": "Modelo Ollama Local",
    "experimental": "Experimental",
    "useOpenRouter": "Usar API OpenRouter",
    "openRouterModel": "Modelo OpenRouter",
    "useOpenai": "Usar API OpenAI",
    "openaiModel": "Modelo OpenAI",
    "useCustomModel": "Usar modelo personalizado",
    "customModelPlaceholder": "Digite o nome do modelo personalizado",
    "addTokens": "+ Adicionar tokens de acesso para repositórios privados",
    "hideTokens": "- Ocultar tokens de acesso",
    "accessToken": "Token de Acesso para Repositórios Privados",
    "selectPlatform": "Selecionar Plataforma",
    "personalAccessToken": "Token de Acesso Pessoal do {platform}",
    "tokenPlaceholder": "Digite seu token do {platform}",
    "tokenSecurityNote": "O token é armazenado apenas na memória e nunca é persistido.",
    "defaultFiltersInfo": "Os filtros padrão incluem diretórios comuns como node_modules, .git e arquivos de artefatos de compilação comuns.",
    "fileFilterTitle": "Configuração de Filtro de Arquivos",
    "advancedOptions": "Opções Avançadas",
    "viewDefaults": "Ver Filtros Padrão",
    "showFilters": "Mostrar Filtros",
    "hideFilters": "Ocultar Filtros",
    "excludedDirs": "Diretórios a Excluir",
    "excludedDirsHelp": "Um caminho de diretório por linha. Caminhos começando com ./ são relativos à raiz do repositório.",
    "enterExcludedDirs": "Digite os diretórios excluídos, um por linha...",
    "excludedFiles": "Arquivos a Excluir",
    "excludedFilesHelp": "Um nome de arquivo por linha. Curingas (*) são suportados.",
    "enterExcludedFiles": "Digite os arquivos excluídos, um por linha...",
    "defaultFilters": "Arquivos e Diretórios Excluídos por Padrão",
    "directories": "Diretórios",
    "files": "Arquivos",
    "scrollToViewMore": "Role para ver mais",
    "changeModel": "Alterar Modelo",
    "defaultNote": "Esses padrões já estão aplicados. Adicione suas exclusões personalizadas acima.",
    "hideDefault": "Ocultar Padrão",
    "viewDefault": "Ver Padrão",
    "includedDirs": "Diretórios Incluídos",
    "includedFiles": "Arquivos Incluídos",
    "enterIncludedDirs": "Digite os diretórios incluídos, um por linha...",
    "enterIncludedFiles": "Digite os arquivos incluídos, um por linha...",
    "filterMode": "Modo de Filtro",
    "excludeMode": "Excluir Caminhos",
    "includeMode": "Incluir Apenas Caminhos",
    "excludeModeDescription": "Especificar caminhos a serem excluídos do processamento (comportamento padrão)",
    "includeModeDescription": "Especificar apenas os caminhos a serem incluídos, ignorando todos os outros",
    "authorizationCode": "Código de Autorização",
    "authorizationRequired": "A autenticação é necessária para gerar a wiki."
  },
  "footer": {
    "copyright": "DeepWiki - Documentação com IA para repositórios de código"
  },
  "ask": {
    "placeholder": "Faça uma pergunta sobre este repositório...",
    "askButton": "Perguntar",
    "deepResearch": "Pesquisa Aprofundada",
    "researchInProgress": "Pesquisa em andamento...",
    "continueResearch": "Continuar Pesquisa",
    "viewPlan": "Ver Plano",
    "viewUpdates": "Ver Atualizações",
    "viewConclusion": "Ver Conclusão"
  },
  "repoPage": {
    "refreshWiki": "Atualizar Wiki",
    "confirmRefresh": "Confirmar Atualização",
    "cancel": "Cancelar",
    "home": "Início",
    "errorTitle": "Erro",
    "errorMessageDefault": "Verifique se o seu repositório existe e é público. Formatos válidos são \"proprietário/repo\", \"https://github.com/proprietário/repo\", \"https://gitlab.com/proprietário/repo\", \"https://bitbucket.org/proprietário/repo\", ou caminhos de pastas locais como \"C:\\\\caminho\\\\para\\\\pasta\" ou \"/caminho/para/pasta\".",
    "embeddingErrorDefault": "Este erro está relacionado com o sistema de embebimento utilizado para analisar o seu repositório. Verifique a configuração do modelo de embebimento, as chaves de API e tente novamente. Se o problema persistir, considere mudar para um provedor de embebimento diferente na configuração do modelo.",
    "backToHome": "Voltar ao Início",
    "exportWiki": "Exportar Wiki",
    "exportAsMarkdown": "Exportar como Markdown",
    "exportAsJson": "Exportar como JSON",
    "pages": "Páginas",
    "relatedFiles": "Arquivos Relacionados:",
    "relatedPages": "Páginas Relacionadas:",
    "selectPagePrompt": "Selecione uma página da navegação para ver seu conteúdo",
    "askAboutRepo": "Faça perguntas sobre este repositório"
  },
  "nav": {
    "wikiProjects": "Projetos Wiki"
  },
  "projects": {
    "title": "Projetos Wiki Processados",
    "searchPlaceholder": "Pesquisar projetos por nome, proprietário ou repositório...",
    "noProjects": "Nenhum projeto encontrado no cache do servidor. O cache pode estar vazio ou o servidor encontrou um problema.",
    "noSearchResults": "Nenhum projeto corresponde aos seus critérios de pesquisa.",
    "processedOn": "Processado em:",
    "loadingProjects": "Carregando projetos...",
    "errorLoading": "Erro ao carregar projetos:",
    "backToHome": "Voltar ao Início",
    "browseExisting": "Navegar por Projetos Existentes",
    "existingProjects": "Projetos Existentes",
    "recentProjects": "Projetos Recentes"
  }
}


================================================
FILE: src/messages/ru.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "Документация с поддержкой ИИ",
    "generateWiki": "Создать Wiki",
    "processing": "Обработка...",
    "error": "Ошибка",
    "submit": "Отправить",
    "cancel": "Отмена",
    "close": "Закрыть",
    "loading": "Загрузка..."
  },
  "loading": {
    "initializing": "Инициализация генерации wiki...",
    "fetchingStructure": "Получение структуры репозитория...",
    "determiningStructure": "Определение структуры wiki...",
    "clearingCache": "Очистка кеша сервера...",
    "preparingDownload": "Пожалуйста, подождите, идет подготовка загрузки..."
  },
  "home": {
    "welcome": "Добро пожаловать в DeepWiki-Open",
    "welcomeTagline": "Документация с поддержкой ИИ для ваших репозиториев кода",
    "description": "Создавайте подробную документацию из репозиториев GitHub, GitLab или Bitbucket всего за несколько кликов.",
    "quickStart": "Быстрый старт",
    "enterRepoUrl": "Введите URL репозитория в одном из следующих форматов:",
    "advancedVisualization": "Продвинутая визуализация с диаграммами Mermaid",
    "diagramDescription": "DeepWiki автоматически генерирует интерактивные диаграммы, чтобы помочь вам понять структуру и связи в коде:",
    "flowDiagram": "Диаграмма потока",
    "sequenceDiagram": "Диаграмма последовательности"
  },
  "form": {
    "repository": "Репозиторий",
    "configureWiki": "Настроить Wiki",
    "repoPlaceholder": "owner/repo или URL GitHub/GitLab/Bitbucket",
    "wikiLanguage": "Язык Wiki",
    "modelOptions": "Настройки модели",
    "modelProvider": "Поставщик модели",
    "modelSelection": "Выбор модели",
    "wikiType": "Тип Wiki",
    "comprehensive": "Подробная",
    "concise": "Краткая",
    "comprehensiveDescription": "Детализированная Wiki со структурированными разделами и большим числом страниц",
    "conciseDescription": "Упрощённая Wiki с меньшим числом страниц и основной информацией",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (локально)",
    "localOllama": "Локальная модель Ollama",
    "experimental": "Экспериментально",
    "useOpenRouter": "Использовать API OpenRouter",
    "openRouterModel": "Модель OpenRouter",
    "useOpenai": "Использовать API OpenAI",
    "openaiModel": "Модель OpenAI",
    "useCustomModel": "Использовать пользовательскую модель",
    "customModelPlaceholder": "Введите имя пользовательской модели",
    "addTokens": "+ Добавить токены доступа для приватных репозиториев",
    "hideTokens": "- Скрыть токены доступа",
    "accessToken": "Токен доступа для приватных репозиториев",
    "selectPlatform": "Выбрать платформу",
    "personalAccessToken": "Персональный токен доступа {platform}",
    "tokenPlaceholder": "Введите ваш токен {platform}",
    "tokenSecurityNote": "Токен хранится только в памяти и не сохраняется.",
    "defaultFiltersInfo": "Фильтры по умолчанию исключают общие директории, такие как node_modules, .git и артефакты сборки.",
    "fileFilterTitle": "Настройка фильтра файлов",
    "advancedOptions": "Дополнительные параметры",
    "viewDefaults": "Показать фильтры по умолчанию",
    "showFilters": "Показать фильтры",
    "hideFilters": "Скрыть фильтры",
    "excludedDirs": "Исключённые директории",
    "excludedDirsHelp": "Один путь к директории на строку. Пути, начинающиеся с ./, относительны к корню репозитория.",
    "enterExcludedDirs": "Введите исключённые директории, по одной на строку...",
    "excludedFiles": "Исключённые файлы",
    "excludedFilesHelp": "Один файл на строку. Поддерживаются подстановочные знаки (*).",
    "enterExcludedFiles": "Введите исключённые файлы, по одному на строку...",
    "defaultFilters": "Исключённые файлы и директории по умолчанию",
    "directories": "Директории",
    "files": "Файлы",
    "scrollToViewMore": "Прокрутите для просмотра",
    "changeModel": "Сменить модель",
    "defaultNote": "Эти значения уже применены. Добавьте свои исключения выше.",
    "hideDefault": "Скрыть по умолчанию",
    "viewDefault": "Показать по умолчанию",
    "includedDirs": "Включённые директории",
    "includedFiles": "Включённые файлы",
    "enterIncludedDirs": "Введите включённые директории, по одной на строку...",
    "enterIncludedFiles": "Введите включённые файлы, по одному на строку...",
    "filterMode": "Режим фильтрации",
    "excludeMode": "Исключить пути",
    "includeMode": "Включить только пути",
    "excludeModeDescription": "Укажите пути, которые нужно исключить из обработки (поведение по умолчанию)",
    "includeModeDescription": "Укажите только те пути, которые нужно включить, игнорируя остальные",
    "authorizationCode": "Код авторизации",
    "authorizationRequired": "Для генерации Wiki требуется авторизация."
  },
  "footer": {
    "copyright": "DeepWiki — документация с поддержкой ИИ для репозиториев кода"
  },
  "ask": {
    "placeholder": "Задайте вопрос об этом репозитории...",
    "askButton": "Спросить",
    "deepResearch": "Глубокое исследование",
    "researchInProgress": "Идёт исследование...",
    "continueResearch": "Продолжить исследование",
    "viewPlan": "Просмотреть план",
    "viewUpdates": "Просмотреть обновления",
    "viewConclusion": "Просмотреть выводы"
  },
  "repoPage": {
    "refreshWiki": "Обновить Wiki",
    "confirmRefresh": "Подтвердить обновление",
    "cancel": "Отмена",
    "home": "Главная",
    "errorTitle": "Ошибка",
    "errorMessageDefault": "Пожалуйста, убедитесь, что ваш репозиторий существует и является публичным. Допустимые форматы: \"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\" или локальные пути вроде \"C:\\\\path\\\\to\\\\folder\" или \"/path/to/folder\".",
    "embeddingErrorDefault": "Ошибка связана с системой встраивания документов для анализа репозитория. Проверьте конфигурацию модели встраивания, API-ключи и повторите попытку. Если проблема сохраняется, попробуйте сменить поставщика модели в настройках.",
    "backToHome": "Назад на главную",
    "exportWiki": "Экспортировать Wiki",
    "exportAsMarkdown": "Экспорт в Markdown",
    "exportAsJson": "Экспорт в JSON",
    "pages": "Страницы",
    "relatedFiles": "Связанные файлы:",
    "relatedPages": "Связанные страницы:",
    "selectPagePrompt": "Выберите страницу в навигации для просмотра её содержимого",
    "askAboutRepo": "Задайте вопросы об этом репозитории"
  },
  "nav": {
    "wikiProjects": "Проекты Wiki"
  },
  "projects": {
    "title": "Обработанные проекты Wiki",
    "searchPlaceholder": "Поиск проектов по названию, владельцу или репозиторию...",
    "noProjects": "На сервере не найдено проектов. Кеш может быть пуст или сервер столкнулся с проблемой.",
    "noSearchResults": "По вашему запросу проектов не найдено.",
    "processedOn": "Обработано:",
    "loadingProjects": "Загрузка проектов...",
    "errorLoading": "Ошибка загрузки проектов:",
    "backToHome": "Назад на главную",
    "browseExisting": "Просмотреть существующие проекты",
    "existingProjects": "Существующие проекты",
    "recentProjects": "Недавние проекты"
  }
}


================================================
FILE: src/messages/vi.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "Tài liệu hỗ trợ bởi AI",
    "generateWiki": "Tạo Wiki",
    "processing": "Đang xử lý...",
    "error": "Lỗi",
    "submit": "Gửi",
    "cancel": "Hủy",
    "close": "Đóng",
    "loading": "Đang tải..."
  },
  "loading": {
    "initializing": "Đang khởi tạo wiki...",
    "fetchingStructure": "Đang lấy cấu trúc repository...",
    "determiningStructure": "Đang xác định cấu trúc wiki...",
    "clearingCache": "Đang xóa bộ nhớ đệm máy chủ...",
    "preparingDownload": "Đang tải! Vui lòng chờ..."
  },
  "home": {
    "welcome": "Chào mừng đến với DeepWiki-Open",
    "welcomeTagline": "Tài liệu hỗ trợ bởi AI cho các repository của bạn",
    "description": "Tạo tài liệu từ các repository GitHub, GitLab, hoặc Bitbucket chỉ với vài cú nhấp chuột.",
    "quickStart": "Bắt đầu nhanh",
    "enterRepoUrl": "Nhập URL repository",
    "advancedVisualization": "Tùy chỉnh sơ đồ trực quan với Mermaid",
    "diagramDescription": "DeepWiki tự động tạo các sơ đồ tương tác giúp bạn hiểu cấu trúc source codes và mối quan hệ giữa chúng:",
    "flowDiagram": "Sơ đồ luồng",
    "sequenceDiagram": "Sơ đồ tuần tự"
  },
  "form": {
    "repository": "Repository",
    "configureWiki": "Cấu hình Wiki",
    "repoPlaceholder": "owner/repo hoặc URL GitHub/GitLab/Bitbucket",
    "wikiLanguage": "Ngôn ngữ Wiki",
    "modelOptions": "Tùy chọn mô hình",
    "modelProvider": "Nhà cung cấp mô hình",
    "modelSelection": "Lựa chọn mô hình",
    "wikiType": "Loại Wiki",
    "comprehensive": "Toàn diện",
    "concise": "Súc tích",
    "comprehensiveDescription": "Wiki chi tiết với các phần có cấu trúc và nhiều trang hơn",
    "conciseDescription": "Wiki đơn giản hóa với ít trang hơn và thông tin thiết yếu",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (Cục bộ)",
    "localOllama": "Mô hình Ollama cục bộ",
    "experimental": "Thử nghiệm",
    "useOpenRouter": "Sử dụng API OpenRouter",
    "openRouterModel": "Mô hình OpenRouter",
    "useOpenai": "Sử dụng API Openai",
    "openaiModel": "Mô hình Openai",
    "useCustomModel": "Sử dụng mô hình tùy chỉnh",
    "customModelPlaceholder": "Nhập tên mô hình tùy chỉnh",
    "addTokens": "+ Thêm token truy cập cho private repositories",
    "hideTokens": "- Ẩn token truy cập",
    "accessToken": "Token truy cập cho private repositories",
    "selectPlatform": "Chọn nền tảng",
    "personalAccessToken": "Token truy cập cá nhân {platform}",
    "tokenPlaceholder": "Nhập token {platform} của bạn",
    "tokenSecurityNote": "Token chỉ được lưu trong bộ nhớ và không bao giờ được lưu trữ vĩnh viễn.",
    "defaultFiltersInfo": "Lọc mặc định bao gồm các thư mục thông thường như node_modules, .git và các tệp tài liệu xây dựng thông thường.",
    "fileFilterTitle": "Cấu hình Lọc Tệp",
    "advancedOptions": "Tùy chọn nâng cao",
    "viewDefaults": "Xem Lọc Mặc định",
    "showFilters": "Hiển thị Lọc",
    "hideFilters": "Ẩn Lọc",
    "excludedDirs": "Thư mục để Loại trừ",
    "excludedDirsHelp": "Một đường dẫn thư mục trên một dòng. Đường dẫn bắt đầu bằng ./ là tương đối so với gốc kho lưu trữ.",
    "enterExcludedDirs": "Nhập thư mục cần loại trừ, mỗi dòng một thư mục...",
    "excludedFiles": "Tệp để Loại trừ",
    "excludedFilesHelp": "Một tên tệp trên một dòng. Hỗ trợ ký tự đại diện (*).",
    "enterExcludedFiles": "Nhập tệp cần loại trừ, mỗi dòng một tệp...",
    "defaultFilters": "Tệp và Thư mục Loại trừ Mặc định",
    "directories": "Thư mục",
    "files": "Tệp",
    "scrollToViewMore": "Dịch chuyển để xem thêm",
    "changeModel": "Thay đổi mô hình",
    "defaultNote": "Các giá trị mặc định này đã được áp dụng. Thêm các loại trừ tùy chỉnh của bạn ở trên.",
    "hideDefault": "Ẩn mặc định",
    "viewDefault": "Xem mặc định",
    "authorizationCode": "Mã xác thực",
    "authorizationRequired": "Mã xác thực cần thiết để tạo Wiki"
  },
  "footer": {
    "copyright": "DeepWiki - Tài liệu hỗ trợ bởi AI cho repository"
  },
  "ask": {
    "placeholder": "Đặt một câu hỏi về repository này...",
    "askButton": "Hỏi",
    "deepResearch": "Nghiên cứu sâu",
    "researchInProgress": "Đang tiến hành nghiên cứu...",
    "continueResearch": "Tiếp tục nghiên cứu",
    "viewPlan": "Xem kế hoạch",
    "viewUpdates": "Xem cập nhật",
    "viewConclusion": "Xem kết luận"
  },
  "repoPage": {
    "refreshWiki": "Làm mới Wiki",
    "confirmRefresh": "Xác nhận làm mới",
    "cancel": "Hủy bỏ",
    "home": "Trang chủ",
    "errorTitle": "Lỗi",
    "errorMessageDefault": "Vui lòng kiểm tra xem repository có tồn tại và công khai hay không. Các định dạng hợp lệ là \"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\", hoặc các đường dẫn thư mục cục bộ như \"C:\\\\path\\\\to\\\\folder\" hoặc \"/path/to/folder\".",
    "embeddingErrorDefault": "Lỗi này liên quan đến hệ thống embedding được sử dụng để phân tích repository của bạn. Vui lòng kiểm tra cấu hình mô hình embedding, API keys và thử lại. Nếu vấn đề vẫn tiếp diễn, hãy xem xét chuyển sang nhà cung cấp embedding khác trong cấu hình mô hình.",
    "backToHome": "Quay lại trang chủ",
    "exportWiki": "Xuất Wiki",
    "exportAsMarkdown": "Xuất dưới dạng Markdown",
    "exportAsJson": "Xuất dưới dạng JSON",
    "pages": "Trang",
    "relatedFiles": "Tệp liên quan:",
    "relatedPages": "Trang liên quan:",
    "selectPagePrompt": "Chọn một trang từ thanh điều hướng để xem nội dung của nó",
    "askAboutRepo": "Hỏi về repository này"
  },
  "nav": {
    "wikiProjects": "Danh sách dự án"
  },
  "projects": {
    "title": "Dự án Wiki đã xử lý",
    "searchPlaceholder": "Tìm kiếm dự án theo tên, chủ sở hữu hoặc repository...",
    "noProjects": "Không tìm thấy dự án nào trong bộ nhớ đệm máy chủ. Bộ nhớ đệm có thể trống hoặc máy chủ gặp sự cố.",
    "noSearchResults": "Không có dự án nào phù hợp với tiêu chí tìm kiếm của bạn.",
    "processedOn": "Xử lý vào:",
    "loadingProjects": "Đang tải dự án...",
    "errorLoading": "Lỗi khi tải dự án:",
    "backToHome": "Về trang chủ",
    "browseExisting": "Duyệt dự án hiện có",
    "existingProjects": "Dự án hiện có",
    "recentProjects": "Dự án gần đây"
  }
}

================================================
FILE: src/messages/zh-tw.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "AI 驅動的文件",
    "generateWiki": "產生 Wiki",
    "processing": "處理中...",
    "error": "錯誤",
    "submit": "提交",
    "cancel": "取消",
    "close": "關閉",
    "loading": "載入中..."
  },
  "loading": {
    "initializing": "初始化 Wiki 產生...",
    "fetchingStructure": "取得儲存庫結構...",
    "determiningStructure": "驗證 Wiki 結構...",
    "clearingCache": "清除伺服器快取...",
    "preparingDownload": "請稍候，我們正在準備您的下載..."
  },
  "home": {
    "welcome": "歡迎使用 DeepWiki",
    "welcomeTagline": "為程式碼儲存庫提供 AI 驅動的文件",
    "description": "只需一次點擊，即可從 GitHub、GitLab 或 Bitbucket 儲存庫產生全面的文件。",
    "quickStart": "快速開始",
    "enterRepoUrl": "請以下列格式之一輸入儲存庫 URL：",
    "advancedVisualization": "使用 Mermaid 圖表進行進階視覺化",
    "diagramDescription": "DeepWiki 自動產生互動式圖表，協助您理解程式碼結構和關係：",
    "flowDiagram": "流程圖",
    "sequenceDiagram": "序列圖"
  },
  "form": {
    "repository": "儲存庫",
    "configureWiki": "設定 Wiki",
    "repoPlaceholder": "擁有者/儲存庫或 GitHub/GitLab/Bitbucket URL",
    "wikiLanguage": "Wiki 語言",
    "modelOptions": "模型選項",
    "modelProvider": "模型提供商",
    "modelSelection": "模型選擇",
    "wikiType": "Wiki 類型",
    "comprehensive": "全面型",
    "concise": "簡潔型",
    "comprehensiveDescription": "包含結構化章節和更多頁面的詳細 Wiki",
    "conciseDescription": "頁面更少，僅包含核心資訊的簡化 Wiki",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama（本機）",
    "localOllama": "本機 Ollama 模型",
    "experimental": "實驗性",
    "useOpenRouter": "使用 OpenRouter API",
    "openRouterModel": "OpenRouter 模型",
    "useOpenai": "使用 OpenAI API",
    "openaiModel": "OpenAI 模型",
    "useCustomModel": "使用自訂模型",
    "customModelPlaceholder": "輸入自訂模型名稱",
    "addTokens": "+ 新增私人儲存庫存取權杖",
    "hideTokens": "- 隱藏存取權杖",
    "accessToken": "私人儲存庫存取權杖",
    "selectPlatform": "選擇平台",
    "personalAccessToken": "{platform} 個人存取權杖",
    "tokenPlaceholder": "輸入您的 {platform} 權杖",
    "tokenSecurityNote": "權杖僅儲存在記憶體中，絕不會持久化。",
    "defaultFiltersInfo": "預設過濾器包括 node_modules、.git 和常見的建置檔案。",
    "fileFilterTitle": "檔案過濾設定",
    "advancedOptions": "進階選項",
    "viewDefaults": "檢視預設過濾",
    "showFilters": "顯示過濾器",
    "hideFilters": "隱藏過濾器",
    "excludedDirs": "要排除的目錄",
    "excludedDirsHelp": "每行一個目錄路徑。以 ./ 開頭表示相對於儲存庫根目錄的路徑。",
    "enterExcludedDirs": "輸入要排除的目錄，每行一個...",
    "excludedFiles": "要排除的檔案",
    "excludedFilesHelp": "每行一個檔案名稱。支援萬用字元（*）。",
    "enterExcludedFiles": "輸入要排除的檔案，每行一個...",
    "defaultFilters": "預設排除的檔案和目錄",
    "directories": "目錄",
    "files": "檔案",
    "scrollToViewMore": "可滑動檢視更多",
    "changeModel": "修改模型",
    "defaultNote": "這些預設設定已經被套用。請在上方新增您的自訂排除項目。",
    "hideDefault": "隱藏預設設定",
    "viewDefault": "檢視預設設定"
  },
  "footer": {
    "copyright": "DeepWiki - 為程式碼儲存庫提供 AI 驅動的文件"
  },
  "ask": {
    "placeholder": "詢問關於此儲存庫的問題...",
    "askButton": "提問",
    "deepResearch": "深度研究",
    "researchInProgress": "研究進行中...",
    "continueResearch": "繼續研究",
    "viewPlan": "檢視計畫",
    "viewUpdates": "檢視更新",
    "viewConclusion": "檢視結論"
  },
  "repoPage": {
    "refreshWiki": "重新整理 Wiki",
    "confirmRefresh": "確認重新整理",
    "cancel": "取消",
    "home": "首頁",
    "errorTitle": "錯誤",
    "errorMessageDefault": "請檢查您的儲存庫是否存在且為公開儲存庫。有效格式為 \"owner/repo\"、\"https://github.com/owner/repo\"、\"https://gitlab.com/owner/repo\"、\"https://bitbucket.org/owner/repo\"，或本機資料夾路徑，如 \"C:\\\\path\\\\to\\\\folder\" 或 \"/path/to/folder\"。",
    "embeddingErrorDefault": "這個錯誤與用於分析您的儲存庫的文件嵌入系統有關。請檢查您的嵌入模型配置、API 密鑰，並重試。如果問題持續存在，請考慮在模型設置中切換到不同的嵌入提供者。",
    "backToHome": "返回首頁",
    "exportWiki": "匯出 Wiki",
    "exportAsMarkdown": "匯出為 Markdown",
    "exportAsJson": "匯出為 JSON",
    "pages": "頁面",
    "relatedFiles": "相關檔案：",
    "relatedPages": "相關頁面：",
    "selectPagePrompt": "從導覽中選擇一個頁面以檢視其內容",
    "askAboutRepo": "詢問關於此儲存庫的問題"
  },
  "nav": {
    "wikiProjects": "專案清單"
  },
  "projects": {
    "title": "已處理的 Wiki 專案",
    "searchPlaceholder": "按專案名稱、擁有者或儲存庫搜尋...",
    "noProjects": "伺服器快取中未找到專案。快取可能為空或伺服器遇到問題。",
    "noSearchResults": "沒有專案符合您的搜尋條件。",
    "processedOn": "處理時間：",
    "loadingProjects": "正在載入專案...",
    "errorLoading": "載入專案時發生錯誤：",
    "backToHome": "返回首頁",
    "browseExisting": "瀏覽現有專案",
    "existingProjects": "現有專案",
    "recentProjects": "最近專案"
  }
} 

================================================
FILE: src/messages/zh.json
================================================
{
  "common": {
    "appName": "DeepWiki-Open",
    "tagline": "AI驱动的文档",
    "generateWiki": "生成Wiki",
    "processing": "处理中...",
    "error": "错误",
    "submit": "提交",
    "cancel": "取消",
    "close": "关闭",
    "loading": "加载中..."
  },
  "loading": {
    "initializing": "初始化Wiki生成...",
    "fetchingStructure": "获取仓库结构...",
    "determiningStructure": "验证Wiki结构...",
    "clearingCache": "清除服务器缓存...",
    "preparingDownload": "请等待，我们正在准备您的下载..."
  },
  "home": {
    "welcome": "欢迎使用DeepWiki",
    "welcomeTagline": "为代码仓库提供AI驱动的文档",
    "description": "只需一次点击，即可从GitHub、GitLab或Bitbucket仓库生成全面的文档。",
    "quickStart": "快速开始",
    "enterRepoUrl": "请以下列格式之一输入仓库URL：",
    "advancedVisualization": "使用Mermaid图表进行高级可视化",
    "diagramDescription": "DeepWiki自动生成交互式图表，帮助您理解代码结构和关系：",
    "flowDiagram": "流程图",
    "sequenceDiagram": "序列图"
  },
  "form": {
    "repository": "仓库",
    "configureWiki": "配置Wiki",
    "repoPlaceholder": "所有者/仓库或GitHub/GitLab/Bitbucket URL",
    "wikiLanguage": "Wiki语言",
    "modelOptions": "模型选项",
    "modelProvider": "模型提供商",
    "modelSelection": "模型选择",
    "wikiType": "Wiki类型",
    "comprehensive": "全面型",
    "concise": "简洁型",
    "comprehensiveDescription": "包含结构化章节和更多页面的详细Wiki",
    "conciseDescription": "页面更少，仅包含核心信息的简化Wiki",
    "providerGoogle": "Google",
    "providerOpenAI": "OpenAI",
    "providerOpenRouter": "OpenRouter",
    "providerOllama": "Ollama (本地)",
    "localOllama": "本地Ollama模型",
    "experimental": "实验性",
    "useOpenRouter": "使用OpenRouter API",
    "openRouterModel": "OpenRouter模型",
    "useOpenai": "使用Openai API",
    "openaiModel": "Openai 模型",
    "useCustomModel": "使用自定义模型",
    "customModelPlaceholder": "输入自定义模型名称",
    "addTokens": "+ 添加私有仓库访问令牌",
    "hideTokens": "- 隐藏访问令牌",
    "accessToken": "私有仓库访问令牌",
    "selectPlatform": "选择平台",
    "personalAccessToken": "{platform}个人访问令牌",
    "tokenPlaceholder": "输入您的{platform}令牌",
    "tokenSecurityNote": "令牌仅存储在内存中，从不持久化。",
    "defaultFiltersInfo": "默认过滤器包括node_modules、.git和常见的构建文件。",
    "fileFilterTitle": "文件过滤配置",
    "advancedOptions": "高级选项",
    "viewDefaults": "查看默认过滤",
    "showFilters": "显示过滤器",
    "hideFilters": "隐藏过滤器",
    "excludedDirs": "要排除的目录",
    "excludedDirsHelp": "每行一个目录路径。以./开头表示相对于仓库根目录的路径。",
    "enterExcludedDirs": "输入要排除的目录，每行一个...",
    "excludedFiles": "要排除的文件",
    "excludedFilesHelp": "每行一个文件名。支持通配符(*)。",
    "enterExcludedFiles": "输入要排除的文件，每行一个...",
    "defaultFilters": "默认排除的文件和目录",
    "directories": "目录",
    "files": "文件",
    "scrollToViewMore": "可滑动查看更多",
    "changeModel": "修改模型",
    "defaultNote": "这些默认配置已经被应用。请在上方添加您的自定义排除项。",
    "hideDefault": "隐藏默认配置",
    "viewDefault": "查看默认配置",
    "authorizationCode": "授权码",
    "authorizationRequired": "生成wiki页面需要填写授权码"
  },
  "footer": {
    "copyright": "DeepWiki - 为代码仓库提供AI驱动的文档"
  },
  "ask": {
    "placeholder": "询问关于此仓库的问题...",
    "askButton": "提问",
    "deepResearch": "深度研究",
    "researchInProgress": "研究进行中...",
    "continueResearch": "继续研究",
    "viewPlan": "查看计划",
    "viewUpdates": "查看更新",
    "viewConclusion": "查看结论"
  },
  "repoPage": {
    "refreshWiki": "刷新Wiki",
    "confirmRefresh": "确认刷新",
    "cancel": "取消",
    "home": "首页",
    "errorTitle": "错误",
    "errorMessageDefault": "请检查您的仓库是否存在且为公开仓库。有效格式为\"owner/repo\", \"https://github.com/owner/repo\", \"https://gitlab.com/owner/repo\", \"https://bitbucket.org/owner/repo\", 或本地文件夹路径，如\"C:\\\\path\\\\to\\\\folder\"或\"/path/to/folder\"。",
    "embeddingErrorDefault": "这个错误与用于分析您的仓库的文件嵌入系统有关。请检查您的嵌入模型配置、API 密钥，并重试。如果问题持续存在，请考虑在模型设置中切换到不同的嵌入提供者。",
    "backToHome": "返回首页",
    "exportWiki": "导出Wiki",
    "exportAsMarkdown": "导出为Markdown",
    "exportAsJson": "导出为JSON",
    "pages": "页面",
    "relatedFiles": "相关文件：",
    "relatedPages": "相关页面：",
    "selectPagePrompt": "从导航中选择一个页面以查看其内容",
    "askAboutRepo": "询问关于此仓库的问题"
  },
  "nav": {
    "wikiProjects": "项目列表"
  },
  "projects": {
    "title": "已处理的Wiki项目",
    "searchPlaceholder": "按项目名称、所有者或仓库搜索...",
    "noProjects": "服务器缓存中未找到项目。缓存可能为空或服务器遇到问题。",
    "noSearchResults": "没有项目符合您的搜索条件。",
    "processedOn": "处理时间:",
    "loadingProjects": "正在加载项目...",
    "errorLoading": "加载项目时出错:",
    "backToHome": "返回首页",
    "browseExisting": "浏览现有项目",
    "existingProjects": "现有项目",
    "recentProjects": "最近项目"
  }
}


================================================
FILE: src/types/repoinfo.tsx
================================================
export interface RepoInfo {
    owner: string;
    repo: string;
    type: string;
    token: string | null;
    localPath: string | null;
    repoUrl: string | null;
}

export default RepoInfo;

================================================
FILE: src/types/wiki/wikipage.tsx
================================================
// Wiki Interfaces
export interface WikiPage {
  id: string;
  title: string;
  content: string;
  filePaths: string[];
  importance: 'high' | 'medium' | 'low';
  relatedPages: string[];
  // New fields for hierarchy
  parentId?: string;
  isSection?: boolean;
  children?: string[]; // IDs of child pages
}

================================================
FILE: src/types/wiki/wikistructure.tsx
================================================
import { WikiPage } from "./wikipage";

/**
 * @fileoverview This file defines the structure of a wiki page and its sections.
 */
export interface WikiStructure {
    id: string;
    title: string;
    description: string;
    pages: WikiPage[];
}

================================================
FILE: src/utils/getRepoUrl.tsx
================================================
import RepoInfo from "@/types/repoinfo";

export default function getRepoUrl(repoInfo: RepoInfo): string {
  console.log('getRepoUrl', repoInfo);
  if (repoInfo.type === 'local' && repoInfo.localPath) {
    return repoInfo.localPath;
  } else {
    if(repoInfo.repoUrl) {
      return repoInfo.repoUrl;
    } else {
      if(repoInfo.owner && repoInfo.repo) {
        return "http://example/" + repoInfo.owner + "/" + repoInfo.repo;
      }
      return '';
    }
  }
};

================================================
FILE: src/utils/urlDecoder.tsx
================================================
export function extractUrlDomain(input: string): string | null {
    try {
        const normalizedInput = input.startsWith('http') ? input : `https://${input}`;
        const url = new URL(normalizedInput);
        return `${url.protocol}//${url.hostname}${url.port ? ':' + url.port : ''}`; // Inclut le protocole et le domaine
    } catch {
        return null; // Not a valid URL
    }
}

export function extractUrlPath(input: string): string | null {
    try {
        const normalizedInput = input.startsWith('http') ? input : `https://${input}`;
        const url = new URL(normalizedInput);
        return url.pathname.replace(/^\/|\/$/g, ''); // Remove leading and trailing slashes
    } catch {
        return null; // Not a valid URL
    }
}

================================================
FILE: src/utils/websocketClient.ts
================================================
/**
 * WebSocket client for chat completions
 * This replaces the HTTP streaming endpoint with a WebSocket connection
 */

// Get the server base URL from environment or use default
const SERVER_BASE_URL = process.env.SERVER_BASE_URL || 'http://localhost:8001';

// Convert HTTP URL to WebSocket URL
const getWebSocketUrl = () => {
  const baseUrl = SERVER_BASE_URL;
  // Replace http:// with ws:// or https:// with wss://
  const wsBaseUrl = baseUrl.replace(/^http/, 'ws');
  return `${wsBaseUrl}/ws/chat`;
};

export interface ChatMessage {
  role: 'user' | 'assistant' | 'system';
  content: string;
}

export interface ChatCompletionRequest {
  repo_url: string;
  messages: ChatMessage[];
  filePath?: string;
  token?: string;
  type?: string;
  provider?: string;
  model?: string;
  language?: string;
  excluded_dirs?: string;
  excluded_files?: string;
}

/**
 * Creates a WebSocket connection for chat completions
 * @param request The chat completion request
 * @param onMessage Callback for received messages
 * @param onError Callback for errors
 * @param onClose Callback for when the connection closes
 * @returns The WebSocket connection
 */
export const createChatWebSocket = (
  request: ChatCompletionRequest,
  onMessage: (message: string) => void,
  onError: (error: Event) => void,
  onClose: () => void
): WebSocket => {
  // Create WebSocket connection
  const ws = new WebSocket(getWebSocketUrl());
  
  // Set up event handlers
  ws.onopen = () => {
    console.log('WebSocket connection established');
    // Send the request as JSON
    ws.send(JSON.stringify(request));
  };
  
  ws.onmessage = (event) => {
    // Call the message handler with the received text
    onMessage(event.data);
  };
  
  ws.onerror = (error) => {
    console.error('WebSocket error:', error);
    onError(error);
  };
  
  ws.onclose = () => {
    console.log('WebSocket connection closed');
    onClose();
  };
  
  return ws;
};

/**
 * Closes a WebSocket connection
 * @param ws The WebSocket connection to close
 */
export const closeWebSocket = (ws: WebSocket | null): void => {
  if (ws && ws.readyState === WebSocket.OPEN) {
    ws.close();
  }
};


================================================
FILE: tailwind.config.js
================================================
module.exports = {
  darkMode: 'selector',
  content: [
    './src/pages/**/*.{js,ts,jsx,tsx,mdx}',
    './src/components/**/*.{js,ts,jsx,tsx,mdx}',
    './src/app/**/*.{js,ts,jsx,tsx,mdx}',
  ],
}

================================================
FILE: test/__init__.py
================================================
# Test package for deepwiki-open data pipeline


================================================
FILE: test/test_extract_repo_name.py
================================================
#!/usr/bin/env python3
"""
Focused test script for the _extract_repo_name_from_url method

Run this script to test only the repository name extraction functionality.
Usage: python test_extract_repo_name.py
"""

import pytest
import os
import sys
from unittest.mock import Mock, patch

# Add the parent directory to the path to import the data_pipeline module
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))

# Import the modules under test
from api.data_pipeline import DatabaseManager


class TestExtractRepoNameFromUrl:
    """Comprehensive tests for the _extract_repo_name_from_url method"""
    
    def setup_method(self):
        """Set up test fixtures before each test method."""
        self.db_manager = DatabaseManager()
    
    def test_extract_repo_name_github_standard_url(self):
        
        # Test standard GitHub URL
        github_url = "https://github.com/owner/repo"
        result = self.db_manager._extract_repo_name_from_url(github_url, "github")
        assert result == "owner_repo"
        
        # Test GitHub URL with .git suffix
        github_url_git = "https://github.com/owner/repo.git"
        result = self.db_manager._extract_repo_name_from_url(github_url_git, "github")
        assert result == "owner_repo"

        # Test GitHub URL with trailing slash
        github_url_slash = "https://github.com/owner/repo/"
        result = self.db_manager._extract_repo_name_from_url(github_url_slash, "github")
        assert result == "owner_repo"
        
        print("✓ GitHub URL tests passed")
    
    def test_extract_repo_name_gitlab_urls(self):
        """Test repository name extraction from GitLab URLs"""
        
        # Test standard GitLab URL
        gitlab_url = "https://gitlab.com/owner/repo"
        result = self.db_manager._extract_repo_name_from_url(gitlab_url, "gitlab")
        assert result == "owner_repo"
        
        # Test GitLab URL with subgroups
        gitlab_subgroup = "https://gitlab.com/group/subgroup/repo"
        result = self.db_manager._extract_repo_name_from_url(gitlab_subgroup, "gitlab")
        assert result == "subgroup_repo"
        
        print("✓ GitLab URL tests passed")
    
    def test_extract_repo_name_bitbucket_urls(self):
        """Test repository name extraction from Bitbucket URLs"""
        bitbucket_url = "https://bitbucket.org/owner/repo"
        result = self.db_manager._extract_repo_name_from_url(bitbucket_url, "bitbucket")
        assert result == "owner_repo"

        print("✓ Bitbucket URL tests passed")
    
    def test_extract_repo_name_local_paths(self):
        """Test repository name extraction from local paths"""
        result = self.db_manager._extract_repo_name_from_url("/home/user/projects/my-repo", "local")
        assert result == "my-repo"

        result = self.db_manager._extract_repo_name_from_url("/var/repos/project.git", "local")
        assert result == "project"

        print("✓ Local path tests passed")

    def test_extract_repo_name_current_implementation_bug(self):
        """Test that demonstrates the current implementation bug"""
        # The current implementation references 'type' which is not in scope
        try:
            # This should raise a NameError due to undefined 'type' variable
            result = self.db_manager._extract_repo_name_from_url("https://github.com/owner/repo")
            print("⚠️  WARNING: Expected the current implementation to fail due to undefined 'type' variable")
            print(f"    But got result: {result}")
        except (NameError, TypeError) as e:
            print(f"✓ Current implementation correctly fails with: {type(e).__name__}: {e}")
        except Exception as e:
            print(f"⚠️  Unexpected error: {type(e).__name__}: {e}")
        
        # Test absolute local path
        local_path = "/home/user/projects/my-repo"
        result = self.db_manager._extract_repo_name_from_url(local_path, "local")
        assert result == "my-repo"
        
        # Test local path with .git suffix
        local_git = "/var/repos/project.git"
        result = self.db_manager._extract_repo_name_from_url(local_git, "local")
        assert result == "project"
        
        print("✓ Local path tests passed")
    
    def test_extract_repo_name_edge_cases(self):
        """Test edge cases for repository name extraction"""
        
        # Test URL with insufficient parts (should use fallback)
        short_url = "https://github.com/repo"
        result = self.db_manager._extract_repo_name_from_url(short_url, "github")
        assert result == "repo"
        
        # Test single directory name
        single_name = "my-repo"
        result = self.db_manager._extract_repo_name_from_url(single_name, "local")
        assert result == "my-repo"
        
        print("✓ Edge case tests passed")


================================================
FILE: tests/README.md
================================================
# DeepWiki Tests

This directory contains all tests for the DeepWiki project, organized by type and scope.

## Directory Structure

```
tests/
├── unit/                 # Unit tests - test individual components in isolation
│   ├── test_google_embedder.py          # Tests for Google AI embedder client
│   └── test_google_embedder_fix.py      # Tests for embedding response parsing fix
├── integration/          # Integration tests - test component interactions
│   └── test_full_integration.py         # Full pipeline integration test
├── api/                  # API tests - test HTTP endpoints
│   └── test_api.py                      # API endpoint tests
└── run_tests.py         # Test runner script
```

## Running Tests

### All Tests
```bash
python tests/run_tests.py
```

### Unit Tests Only
```bash
python tests/run_tests.py --unit
```

### Integration Tests Only
```bash
python tests/run_tests.py --integration
```

### API Tests Only
```bash
python tests/run_tests.py --api
```

### Individual Test Files
```bash
# Unit tests
python tests/unit/test_google_embedder.py
python tests/unit/test_google_embedder_fix.py

# Integration tests
python tests/integration/test_full_integration.py

# API tests
python tests/api/test_api.py
```

## Test Requirements

### Environment Variables
- `GOOGLE_API_KEY`: Required for Google AI embedder tests
- `OPENAI_API_KEY`: Required for some integration tests
- `DEEPWIKI_EMBEDDER_TYPE`: Set to 'google' for Google embedder tests

### Dependencies
All test dependencies are included in the main project requirements:
- `python-dotenv`: For loading environment variables
- `adalflow`: Core framework for embeddings
- `google-generativeai`: Google AI API client
- `requests`: For API testing

## Test Categories

### Unit Tests
- **Purpose**: Test individual components in isolation
- **Speed**: Fast (< 1 second per test)
- **Dependencies**: Minimal external dependencies
- **Examples**: Testing embedder response parsing, configuration loading

### Integration Tests  
- **Purpose**: Test how components work together
- **Speed**: Medium (1-10 seconds per test)
- **Dependencies**: May require API keys and external services
- **Examples**: End-to-end embedding pipeline, RAG workflow

### API Tests
- **Purpose**: Test HTTP endpoints and WebSocket connections
- **Speed**: Medium-slow (5-30 seconds per test)
- **Dependencies**: Requires running API server
- **Examples**: Chat completion endpoints, streaming responses

## Adding New Tests

1. **Choose the right category**: Determine if your test is unit, integration, or API
2. **Create the test file**: Place it in the appropriate subdirectory
3. **Follow naming convention**: `test_<component_name>.py`
4. **Add proper imports**: Use the project root path setup pattern
5. **Document the test**: Add docstrings explaining what the test does
6. **Update this README**: Add your test to the appropriate section

## Troubleshooting

### Import Errors
If you get import errors, ensure the test file includes the project root path setup:

```python
from pathlib import Path
import sys

# Add the project root to the Python path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))
```

### API Key Issues
Make sure you have a `.env` file in the project root with the required API keys:

```
GOOGLE_API_KEY=your_google_api_key_here
OPENAI_API_KEY=your_openai_api_key_here
DEEPWIKI_EMBEDDER_TYPE=google
```

### Server Dependencies
For API tests, ensure the FastAPI server is running on the expected port:

```bash
cd api
python main.py
```

================================================
FILE: tests/__init__.py
================================================
# Tests for DeepWiki

================================================
FILE: tests/api/__init__.py
================================================
# API tests

================================================
FILE: tests/api/test_api.py
================================================
import requests
import json
import sys

def test_streaming_endpoint(repo_url, query, file_path=None):
    """
    Test the streaming endpoint with a given repository URL and query.
    
    Args:
        repo_url (str): The GitHub repository URL
        query (str): The query to send
        file_path (str, optional): Path to a file in the repository
    """
    # Define the API endpoint
    url = "http://localhost:8000/chat/completions/stream"
    
    # Define the request payload
    payload = {
        "repo_url": repo_url,
        "messages": [
            {
                "role": "user",
                "content": query
            }
        ],
        "filePath": file_path
    }
    
    print(f"Testing streaming endpoint with:")
    print(f"  Repository: {repo_url}")
    print(f"  Query: {query}")
    if file_path:
        print(f"  File Path: {file_path}")
    print("\nResponse:")
    
    try:
        # Make the request with streaming enabled
        response = requests.post(url, json=payload, stream=True)
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f"Error: {response.status_code}")
            try:
                error_data = json.loads(response.content)
                print(f"Error details: {error_data.get('detail', 'Unknown error')}")
            except:
                print(f"Error content: {response.content}")
            return
        
        # Process the streaming response
        for chunk in response.iter_content(chunk_size=None):
            if chunk:
                print(chunk.decode('utf-8'), end='', flush=True)
        
        print("\n\nStreaming completed successfully.")
    
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    # Get command line arguments
    if len(sys.argv) < 3:
        print("Usage: python test_api.py <repo_url> <query> [file_path]")
        sys.exit(1)
    
    repo_url = sys.argv[1]
    query = sys.argv[2]
    file_path = sys.argv[3] if len(sys.argv) > 3 else None
    
    test_streaming_endpoint(repo_url, query, file_path)


================================================
FILE: tests/integration/__init__.py
================================================
# Integration tests

================================================
FILE: tests/integration/test_full_integration.py
================================================
#!/usr/bin/env python3
"""Full integration test for Google AI embeddings."""

import os
import sys
import json
from pathlib import Path

# Add the project root to the Python path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

def test_config_loading():
    """Test that configurations load properly."""
    print("🔧 Testing configuration loading...")
    
    try:
        from api.config import configs, CLIENT_CLASSES
        
        # Check if Google embedder config exists
        if 'embedder_google' in configs:
            print("✅ embedder_google configuration found")
            google_config = configs['embedder_google']
            print(f"📋 Google config: {json.dumps(google_config, indent=2, default=str)}")
        else:
            print("❌ embedder_google configuration not found")
            return False
            
        # Check if GoogleEmbedderClient is in CLIENT_CLASSES
        if 'GoogleEmbedderClient' in CLIENT_CLASSES:
            print("✅ GoogleEmbedderClient found in CLIENT_CLASSES")
        else:
            print("❌ GoogleEmbedderClient not found in CLIENT_CLASSES")
            return False
            
        return True
        
    except Exception as e:
        print(f"❌ Error loading configuration: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_embedder_selection():
    """Test embedder selection mechanism."""
    print("\n🔧 Testing embedder selection...")
    
    try:
        from api.tools.embedder import get_embedder
        from api.config import get_embedder_type, is_google_embedder
        
        # Test default embedder type
        current_type = get_embedder_type()
        print(f"📋 Current embedder type: {current_type}")
        
        # Test is_google_embedder function
        is_google = is_google_embedder()
        print(f"📋 Is Google embedder: {is_google}")
        
        # Test get_embedder with google type
        print("🧪 Testing get_embedder with embedder_type='google'...")
        embedder = get_embedder(embedder_type='google')
        print(f"✅ Google embedder created: {type(embedder)}")
        
        return True
        
    except Exception as e:
        print(f"❌ Error testing embedder selection: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_google_embedder_with_env():
    """Test Google embedder with environment variable."""
    print("\n🔧 Testing with DEEPWIKI_EMBEDDER_TYPE=google...")
    
    # Set environment variable
    original_value = os.environ.get('DEEPWIKI_EMBEDDER_TYPE')
    os.environ['DEEPWIKI_EMBEDDER_TYPE'] = 'google'
    
    try:
        # Reload config module to pick up new env var
        import importlib
        import api.config
        importlib.reload(api.config)
        
        from api.config import EMBEDDER_TYPE, get_embedder_type, get_embedder_config
        from api.tools.embedder import get_embedder
        
        print(f"📋 EMBEDDER_TYPE: {EMBEDDER_TYPE}")
        print(f"📋 get_embedder_type(): {get_embedder_type()}")
        
        # Test getting embedder config
        config = get_embedder_config()
        print(f"📋 Current embedder config client: {config.get('client_class', 'Unknown')}")
        
        # Test creating embedder
        embedder = get_embedder()
        print(f"✅ Embedder created with google env var: {type(embedder)}")
        
        return True
        
    except Exception as e:
        print(f"❌ Error testing with environment variable: {e}")
        import traceback
        traceback.print_exc()
        return False
        
    finally:
        # Restore original environment variable
        if original_value is not None:
            os.environ['DEEPWIKI_EMBEDDER_TYPE'] = original_value
        elif 'DEEPWIKI_EMBEDDER_TYPE' in os.environ:
            del os.environ['DEEPWIKI_EMBEDDER_TYPE']

def main():
    """Run all integration tests."""
    print("🚀 Starting Google AI Embeddings Integration Tests")
    print("=" * 60)
    
    tests = [
        test_config_loading,
        test_embedder_selection,
        test_google_embedder_with_env,
    ]
    
    passed = 0
    total = len(tests)
    
    for test in tests:
        try:
            if test():
                passed += 1
                print("✅ PASSED")
            else:
                print("❌ FAILED")
        except Exception as e:
            print(f"❌ FAILED with exception: {e}")
        print("-" * 40)
    
    print(f"\n📊 Test Results: {passed}/{total} tests passed")
    
    if passed == total:
        print("🎉 All integration tests passed!")
        return True
    else:
        print("💥 Some tests failed!")
        return False

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)

================================================
FILE: tests/run_tests.py
================================================
#!/usr/bin/env python3
"""
Test runner for DeepWiki project.

This script provides a unified way to run all tests or specific test categories.
"""

import os
import sys
import argparse
import subprocess
from pathlib import Path

# Add the project root to the Python path
project_root = Path(__file__).parent.parent
sys.path.insert(0, str(project_root))

def run_test_file(test_file):
    """Run a single test file and return success status."""
    print(f"\n🧪 Running {test_file}...")
    try:
        result = subprocess.run([sys.executable, str(test_file)], 
                              capture_output=True, text=True, cwd=project_root)
        
        if result.returncode == 0:
            print(f"✅ {test_file.name} - PASSED")
            if result.stdout:
                print(f"📄 Output:\n{result.stdout}")
            return True
        else:
            print(f"❌ {test_file.name} - FAILED")
            if result.stderr:
                print(f"💥 Error:\n{result.stderr}")
            if result.stdout:
                print(f"📄 Output:\n{result.stdout}")
            return False
    except Exception as e:
        print(f"💥 {test_file.name} - ERROR: {e}")
        return False

def run_tests(test_dirs):
    """Run all tests in the specified directories."""
    total_tests = 0
    passed_tests = 0
    failed_tests = []
    
    for test_dir in test_dirs:
        test_path = Path(__file__).parent / test_dir
        if not test_path.exists():
            print(f"⚠️  Warning: Test directory {test_dir} not found")
            continue
            
        test_files = list(test_path.glob("test_*.py"))
        if not test_files:
            print(f"⚠️  No test files found in {test_dir}")
            continue
            
        print(f"\n📁 Running {test_dir} tests...")
        for test_file in sorted(test_files):
            total_tests += 1
            if run_test_file(test_file):
                passed_tests += 1
            else:
                failed_tests.append(str(test_file))
    
    # Print summary
    print(f"\n{'='*50}")
    print(f"📊 TEST SUMMARY")
    print(f"{'='*50}")
    print(f"Total tests: {total_tests}")
    print(f"Passed: {passed_tests}")
    print(f"Failed: {len(failed_tests)}")
    
    if failed_tests:
        print(f"\n❌ Failed tests:")
        for test in failed_tests:
            print(f"  - {test}")
        print(f"\n💡 Tip: Run individual failed tests for more details")
        return False
    else:
        print(f"\n🎉 All tests passed!")
        return True

def check_environment():
    """Check if required environment variables and dependencies are available."""
    print("🔧 Checking test environment...")
    
    # Check for .env file
    env_file = project_root / ".env"
    if env_file.exists():
        print("✅ .env file found")
        from dotenv import load_dotenv
        load_dotenv(env_file)
    else:
        print("⚠️  No .env file found - some tests may fail without API keys")
    
    # Check for API keys
    api_keys = {
        "GOOGLE_API_KEY": "Google AI embedder tests",
        "OPENAI_API_KEY": "OpenAI integration tests"
    }
    
    for key, purpose in api_keys.items():
        if os.getenv(key):
            print(f"✅ {key} is set ({purpose})")
        else:
            print(f"⚠️  {key} not set - {purpose} may fail")
    
    # Check Python dependencies
    try:
        import adalflow
        print("✅ adalflow available")
    except ImportError:
        print("❌ adalflow not available - install with: pip install adalflow")
    
    try:
        import google.generativeai
        print("✅ google-generativeai available")
    except ImportError:
        print("❌ google-generativeai not available - install with: pip install google-generativeai")
    
    try:
        import requests
        print("✅ requests available")
    except ImportError:
        print("❌ requests not available - install with: pip install requests")

def main():
    parser = argparse.ArgumentParser(description="Run DeepWiki tests")
    parser.add_argument("--unit", action="store_true", help="Run only unit tests")
    parser.add_argument("--integration", action="store_true", help="Run only integration tests")
    parser.add_argument("--api", action="store_true", help="Run only API tests")
    parser.add_argument("--check-env", action="store_true", help="Only check environment setup")
    parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
    
    args = parser.parse_args()
    
    # Check environment first
    check_environment()
    
    if args.check_env:
        return
    
    # Determine which tests to run
    test_dirs = []
    if args.unit:
        test_dirs.append("unit")
    if args.integration:
        test_dirs.append("integration")
    if args.api:
        test_dirs.append("api")
    
    # If no specific category selected, run all
    if not test_dirs:
        test_dirs = ["unit", "integration", "api"]
    
    print(f"\n🚀 Starting test run for: {', '.join(test_dirs)}")
    
    success = run_tests(test_dirs)
    sys.exit(0 if success else 1)

if __name__ == "__main__":
    main()

================================================
FILE: tests/unit/__init__.py
================================================
# Unit tests

================================================
FILE: tests/unit/test_all_embedders.py
================================================
#!/usr/bin/env python3
"""
Comprehensive test suite for all embedder types (OpenAI, Google, Ollama).
This test file validates the embedder system before any modifications are made.
"""

import os
import sys
import logging
from pathlib import Path
from unittest.mock import patch, MagicMock

# Add the project root to the Python path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

# Set up environment
from dotenv import load_dotenv
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Simple test framework without pytest
class TestRunner:
    def __init__(self):
        self.tests_run = 0
        self.tests_passed = 0
        self.tests_failed = 0
        self.failures = []
    
    def run_test(self, test_func, test_name=None):
        """Run a single test function."""
        if test_name is None:
            test_name = test_func.__name__
        
        self.tests_run += 1
        try:
            logger.info(f"Running test: {test_name}")
            test_func()
            self.tests_passed += 1
            logger.info(f"✅ {test_name} PASSED")
            return True
        except Exception as e:
            self.tests_failed += 1
            self.failures.append((test_name, str(e)))
            logger.error(f"❌ {test_name} FAILED: {e}")
            return False
    
    def run_test_class(self, test_class):
        """Run all test methods in a test class."""
        instance = test_class()
        test_methods = [getattr(instance, method) for method in dir(instance) 
                       if method.startswith('test_') and callable(getattr(instance, method))]
        
        for test_method in test_methods:
            test_name = f"{test_class.__name__}.{test_method.__name__}"
            self.run_test(test_method, test_name)
    
    def run_parametrized_test(self, test_func, parameters, test_name_base=None):
        """Run a test function with multiple parameter sets."""
        if test_name_base is None:
            test_name_base = test_func.__name__
        
        for i, param in enumerate(parameters):
            test_name = f"{test_name_base}[{param}]"
            self.run_test(lambda: test_func(param), test_name)
    
    def summary(self):
        """Print test summary."""
        logger.info(f"\n📊 Test Summary:")
        logger.info(f"Tests run: {self.tests_run}")
        logger.info(f"Passed: {self.tests_passed}")
        logger.info(f"Failed: {self.tests_failed}")
        
        if self.failures:
            logger.error("\n❌ Failed tests:")
            for test_name, error in self.failures:
                logger.error(f"  - {test_name}: {error}")
        
        return self.tests_failed == 0

class TestEmbedderConfiguration:
    """Test embedder configuration system."""
    
    def test_config_loading(self):
        """Test that all embedder configurations load properly."""
        from api.config import configs, CLIENT_CLASSES
        
        # Check all embedder configurations exist
        assert 'embedder' in configs, "OpenAI embedder config missing"
        assert 'embedder_google' in configs, "Google embedder config missing"
        assert 'embedder_ollama' in configs, "Ollama embedder config missing"
        assert 'embedder_bedrock' in configs, "Bedrock embedder config missing"
        
        # Check client classes are available
        assert 'OpenAIClient' in CLIENT_CLASSES, "OpenAIClient missing from CLIENT_CLASSES"
        assert 'GoogleEmbedderClient' in CLIENT_CLASSES, "GoogleEmbedderClient missing from CLIENT_CLASSES"
        assert 'OllamaClient' in CLIENT_CLASSES, "OllamaClient missing from CLIENT_CLASSES"
        assert 'BedrockClient' in CLIENT_CLASSES, "BedrockClient missing from CLIENT_CLASSES"
    
    def test_embedder_type_detection(self):
        """Test embedder type detection functions."""
        from api.config import get_embedder_type, is_ollama_embedder, is_google_embedder, is_bedrock_embedder
        
        # Default type should be detected
        current_type = get_embedder_type()
        assert current_type in ['openai', 'google', 'ollama', 'bedrock'], f"Invalid embedder type: {current_type}"
        
        # Boolean functions should work
        is_ollama = is_ollama_embedder()
        is_google = is_google_embedder()
        is_bedrock = is_bedrock_embedder()
        assert isinstance(is_ollama, bool), "is_ollama_embedder should return boolean"
        assert isinstance(is_google, bool), "is_google_embedder should return boolean"
        assert isinstance(is_bedrock, bool), "is_bedrock_embedder should return boolean"
        
        # Only one should be true at a time (unless using openai default)
        if current_type == 'bedrock':
            assert is_bedrock and not is_ollama and not is_google
        elif current_type == 'ollama':
            assert is_ollama and not is_google and not is_bedrock
        elif current_type == 'google':
            assert is_google and not is_ollama and not is_bedrock
        else:  # openai
            assert not is_ollama and not is_google and not is_bedrock

    def test_get_embedder_config(self, embedder_type=None):
        """Test getting embedder config for each type."""
        from api.config import get_embedder_config
        
        if embedder_type:
            # Mock the EMBEDDER_TYPE for testing
            with patch('api.config.EMBEDDER_TYPE', embedder_type):
                config = get_embedder_config()
                assert isinstance(config, dict), f"Config for {embedder_type} should be dict"
                assert 'model_client' in config or 'client_class' in config, f"No client specified for {embedder_type}"
        else:
            # Test current configuration
            config = get_embedder_config()
            assert isinstance(config, dict), "Config should be dict"
            assert 'model_client' in config or 'client_class' in config, "No client specified"


class TestEmbedderFactory:
    """Test the embedder factory function."""
    
    def test_get_embedder_with_explicit_type(self):
        """Test get_embedder with explicit embedder_type parameter."""
        from api.tools.embedder import get_embedder
        
        # Test Google embedder
        google_embedder = get_embedder(embedder_type='google')
        assert google_embedder is not None, "Google embedder should be created"

        # Test Bedrock embedder (mock boto3 to avoid hitting AWS credential providers)
        with patch("api.bedrock_client.boto3.Session") as mock_session_cls:
            mock_session = MagicMock()
            mock_session.client.return_value = MagicMock()
            mock_session_cls.return_value = mock_session
            bedrock_embedder = get_embedder(embedder_type='bedrock')
            assert bedrock_embedder is not None, "Bedrock embedder should be created"
        
        # Test OpenAI embedder
        openai_embedder = get_embedder(embedder_type='openai')
        assert openai_embedder is not None, "OpenAI embedder should be created"
        
        # Test Ollama embedder (may fail if Ollama not available, but should not crash)
        try:
            ollama_embedder = get_embedder(embedder_type='ollama')
            assert ollama_embedder is not None, "Ollama embedder should be created"
        except Exception as e:
            logger.warning(f"Ollama embedder creation failed (expected if Ollama not available): {e}")

    def test_get_embedder_with_legacy_params(self):
        """Test get_embedder with legacy boolean parameters."""
        from api.tools.embedder import get_embedder
        
        # Test with use_google_embedder=True
        google_embedder = get_embedder(use_google_embedder=True)
        assert google_embedder is not None, "Google embedder should be created with use_google_embedder=True"
        
        # Test with is_local_ollama=True
        try:
            ollama_embedder = get_embedder(is_local_ollama=True)
            assert ollama_embedder is not None, "Ollama embedder should be created with is_local_ollama=True"
        except Exception as e:
            logger.warning(f"Ollama embedder creation failed (expected if Ollama not available): {e}")

    def test_get_embedder_auto_detection(self):
        """Test get_embedder with automatic type detection."""
        from api.tools.embedder import get_embedder
        
        # Test auto-detection (should use current configuration)
        embedder = get_embedder()
        assert embedder is not None, "Auto-detected embedder should be created"


class TestEmbedderClients:
    """Test individual embedder clients."""

    def test_google_embedder_client(self):
        """Test Google embedder client directly."""
        if not os.getenv('GOOGLE_API_KEY'):
            logger.warning("Skipping Google embedder test - GOOGLE_API_KEY not available")
            return
            
        from api.google_embedder_client import GoogleEmbedderClient
        from adalflow.core.types import ModelType
        
        client = GoogleEmbedderClient()
        
        # Test single embedding
        api_kwargs = client.convert_inputs_to_api_kwargs(
            input="Hello world",
            model_kwargs={"model": "text-embedding-004", "task_type": "SEMANTIC_SIMILARITY"},
            model_type=ModelType.EMBEDDER
        )
        
        response = client.call(api_kwargs, ModelType.EMBEDDER)
        assert response is not None, "Google embedder should return response"
        
        # Parse the response
        parsed = client.parse_embedding_response(response)
        assert parsed.data is not None, "Parsed response should have data"
        assert len(parsed.data) > 0, "Should have at least one embedding"
        assert parsed.error is None, "Should not have errors"

    def test_openai_embedder_via_adalflow(self):
        """Test OpenAI embedder through AdalFlow."""
        if not os.getenv('OPENAI_API_KEY'):
            logger.warning("Skipping OpenAI embedder test - OPENAI_API_KEY not available")
            return
            
        import adalflow as adal
        from api.openai_client import OpenAIClient
        
        client = OpenAIClient()
        embedder = adal.Embedder(
            model_client=client,
            model_kwargs={"model": "text-embedding-3-small", "dimensions": 256}
        )
        
        result = embedder("Hello world")
        assert result is not None, "OpenAI embedder should return result"
        assert hasattr(result, 'data'), "Result should have data attribute"
        assert len(result.data) > 0, "Should have at least one embedding"


class TestDataPipelineFunctions:
    """Test data pipeline functions that use embedders."""
    
    def test_count_tokens(self, embedder_type=None):
        """Test token counting with different embedder types."""
        from api.data_pipeline import count_tokens
        
        test_text = "This is a test string for token counting."
        
        if embedder_type is not None:
            # Test with specific is_ollama_embedder value
            token_count = count_tokens(test_text, is_ollama_embedder=embedder_type)
            assert isinstance(token_count, int), "Token count should be an integer"
            assert token_count > 0, "Token count should be positive"
        else:
            # Test with all values
            for is_ollama in [None, True, False]:
                token_count = count_tokens(test_text, is_ollama_embedder=is_ollama)
                assert isinstance(token_count, int), "Token count should be an integer"
                assert token_count > 0, "Token count should be positive"

    def test_prepare_data_pipeline(self, is_ollama=None):
        """Test data pipeline preparation with different embedder types."""
        from api.data_pipeline import prepare_data_pipeline
        
        if is_ollama is not None:
            try:
                pipeline = prepare_data_pipeline(is_ollama_embedder=is_ollama)
                assert pipeline is not None, "Data pipeline should be created"
                assert hasattr(pipeline, '__call__'), "Pipeline should be callable"
            except Exception as e:
                # Some configurations might fail if services aren't available
                logger.warning(f"Pipeline creation failed (might be expected): {e}")
        else:
            # Test with all values
            for is_ollama_val in [None, True, False]:
                try:
                    pipeline = prepare_data_pipeline(is_ollama_embedder=is_ollama_val)
                    assert pipeline is not None, "Data pipeline should be created"
                    assert hasattr(pipeline, '__call__'), "Pipeline should be callable"
                except Exception as e:
                    logger.warning(f"Pipeline creation failed for is_ollama={is_ollama_val}: {e}")


class TestRAGIntegration:
    """Test RAG class integration with different embedders."""
    
    def test_rag_initialization(self):
        """Test RAG initialization with different embedder configurations."""
        from api.rag import RAG
        
        # Test with default configuration
        try:
            rag = RAG(provider="google", model="gemini-1.5-flash")
            assert rag is not None, "RAG should be initialized"
            assert hasattr(rag, 'embedder'), "RAG should have embedder"
            assert hasattr(rag, 'is_ollama_embedder'), "RAG should have is_ollama_embedder attribute"
        except Exception as e:
            logger.warning(f"RAG initialization failed (might be expected if keys missing): {e}")

    def test_rag_embedder_type_detection(self):
        """Test that RAG correctly detects embedder type."""
        from api.rag import RAG
        
        try:
            rag = RAG()
            # Should have the embedder type detection logic
            assert hasattr(rag, 'is_ollama_embedder'), "RAG should detect embedder type"
            assert isinstance(rag.is_ollama_embedder, bool), "is_ollama_embedder should be boolean"
        except Exception as e:
            logger.warning(f"RAG initialization failed: {e}")


class TestEnvironmentVariableHandling:
    """Test embedder selection via environment variables."""
    
    def test_embedder_type_env_var(self, embedder_type=None):
        """Test embedder selection via DEEPWIKI_EMBEDDER_TYPE environment variable."""
        import importlib
        import api.config
        
        if embedder_type:
            # Test specific embedder type
            self._test_single_embedder_type(embedder_type)
        else:
            # Test all embedder types
            for et in ['openai', 'google', 'ollama', 'bedrock']:
                self._test_single_embedder_type(et)
    
    def _test_single_embedder_type(self, embedder_type):
        """Test a single embedder type."""
        import importlib
        import api.config
        
        # Save original value
        original_value = os.environ.get('DEEPWIKI_EMBEDDER_TYPE')
        
        try:
            # Set environment variable
            os.environ['DEEPWIKI_EMBEDDER_TYPE'] = embedder_type
            
            # Reload config to pick up new env var
            importlib.reload(api.config)
            
            from api.config import EMBEDDER_TYPE, get_embedder_type
            
            assert EMBEDDER_TYPE == embedder_type, f"EMBEDDER_TYPE should be {embedder_type}"
            assert get_embedder_type() == embedder_type, f"get_embedder_type() should return {embedder_type}"
            
        finally:
            # Restore original value
            if original_value is not None:
                os.environ['DEEPWIKI_EMBEDDER_TYPE'] = original_value
            elif 'DEEPWIKI_EMBEDDER_TYPE' in os.environ:
                del os.environ['DEEPWIKI_EMBEDDER_TYPE']
            
            # Reload config to restore original state
            importlib.reload(api.config)


class TestIssuesIdentified:
    """Test the specific issues identified in the codebase."""
    
    def test_binary_assumptions_in_rag(self):
        """Test that RAG doesn't make binary assumptions about embedders."""
        from api.rag import RAG
        
        # The current implementation only considers is_ollama_embedder
        # This test documents the current behavior and will help verify fixes
        try:
            rag = RAG()
            
            # Current implementation only has is_ollama_embedder
            assert hasattr(rag, 'is_ollama_embedder'), "RAG should have is_ollama_embedder"
            
            # This is the issue: no explicit support for Google embedder detection
            # The fix should add proper embedder type detection
            
        except Exception as e:
            logger.warning(f"RAG test failed: {e}")

    def test_binary_assumptions_in_data_pipeline(self):
        """Test binary assumptions in data pipeline functions."""
        from api.data_pipeline import prepare_data_pipeline, count_tokens
        
        # These functions currently only consider is_ollama_embedder parameter
        # This test documents the issue and will verify fixes
        
        # count_tokens only considers ollama vs non-ollama
        token_count_ollama = count_tokens("test", is_ollama_embedder=True)
        token_count_other = count_tokens("test", is_ollama_embedder=False)
        
        assert isinstance(token_count_ollama, int)
        assert isinstance(token_count_other, int)
        
        # prepare_data_pipeline only accepts is_ollama_embedder parameter
        try:
            pipeline_ollama = prepare_data_pipeline(is_ollama_embedder=True)
            pipeline_other = prepare_data_pipeline(is_ollama_embedder=False)
            
            assert pipeline_ollama is not None
            assert pipeline_other is not None
        except Exception as e:
            logger.warning(f"Pipeline creation failed: {e}")


def run_all_tests():
    """Run all tests and return results."""
    logger.info("Running comprehensive embedder tests...")
    
    runner = TestRunner()
    
    # Test classes to run
    test_classes = [
        TestEmbedderConfiguration,
        TestEmbedderFactory,
        TestEmbedderClients,
        TestDataPipelineFunctions,
        TestRAGIntegration,
        TestEnvironmentVariableHandling,
        TestIssuesIdentified
    ]
    
    # Run all test classes
    for test_class in test_classes:
        logger.info(f"\n🧪 Running {test_class.__name__}...")
        runner.run_test_class(test_class)
    
    # Run parametrized tests manually
    logger.info("\n🧪 Running parametrized tests...")
    
    # Test embedder config with different types
    config_test = TestEmbedderConfiguration()
    for embedder_type in ['openai', 'google', 'ollama', 'bedrock']:
        runner.run_test(
            lambda et=embedder_type: config_test.test_get_embedder_config(et),
            f"TestEmbedderConfiguration.test_get_embedder_config[{embedder_type}]"
        )
    
    # Test token counting with different types
    pipeline_test = TestDataPipelineFunctions()
    for embedder_type in [None, True, False]:
        runner.run_test(
            lambda et=embedder_type: pipeline_test.test_count_tokens(et),
            f"TestDataPipelineFunctions.test_count_tokens[{embedder_type}]"
        )
    
    # Test pipeline preparation with different types
    for is_ollama in [None, True, False]:
        runner.run_test(
            lambda ol=is_ollama: pipeline_test.test_prepare_data_pipeline(ol),
            f"TestDataPipelineFunctions.test_prepare_data_pipeline[{is_ollama}]"
        )
    
    # Test environment variable handling
    env_test = TestEnvironmentVariableHandling()
    for embedder_type in ['openai', 'google', 'ollama', 'bedrock']:
        runner.run_test(
            lambda et=embedder_type: env_test.test_embedder_type_env_var(et),
            f"TestEnvironmentVariableHandling.test_embedder_type_env_var[{embedder_type}]"
        )
    
    return runner.summary()


if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)


================================================
FILE: tests/unit/test_google_embedder.py
================================================
#!/usr/bin/env python3
"""
Test script to reproduce and fix Google embedder 'list' object has no attribute 'embedding' error.
"""

import os
import sys
import logging
from pathlib import Path

# Add the project root to the Python path
project_root = Path(__file__).parent.parent.parent
sys.path.insert(0, str(project_root))

# Set up environment
from dotenv import load_dotenv
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def test_google_embedder_client():
    """Test the Google embedder client directly."""
    logger.info("Testing Google embedder client...")
    
    try:
        from api.google_embedder_client import GoogleEmbedderClient
        from adalflow.core.types import ModelType
        
        # Initialize the client
        client = GoogleEmbedderClient()
        
        # Test single embedding
        logger.info("Testing single embedding...")
        api_kwargs = client.convert_inputs_to_api_kwargs(
            input="Hello world",
            model_kwargs={"model": "text-embedding-004", "task_type": "SEMANTIC_SIMILARITY"},
            model_type=ModelType.EMBEDDER
        )
        
        response = client.call(api_kwargs, ModelType.EMBEDDER)
        logger.info(f"Single embedding response type: {type(response)}")
        logger.info(f"Single embedding response keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
        
        # Parse the response
        parsed = client.parse_embedding_response(response)
        logger.info(f"Parsed response data length: {len(parsed.data) if parsed.data else 0}")
        logger.info(f"Parsed response error: {parsed.error}")
        
        # Test batch embedding
        logger.info("Testing batch embedding...")
        api_kwargs = client.convert_inputs_to_api_kwargs(
            input=["Hello world", "Test embedding"],
            model_kwargs={"model": "text-embedding-004", "task_type": "SEMANTIC_SIMILARITY"},
            model_type=ModelType.EMBEDDER
        )
        
        response = client.call(api_kwargs, ModelType.EMBEDDER)
        logger.info(f"Batch embedding response type: {type(response)}")
        logger.info(f"Batch embedding response keys: {list(response.keys()) if isinstance(response, dict) else 'Not a dict'}")
        
        # Parse the response
        parsed = client.parse_embedding_response(response)
        logger.info(f"Parsed batch response data length: {len(parsed.data) if parsed.data else 0}")
        logger.info(f"Parsed batch response error: {parsed.error}")
        
        return True
        
    except Exception as e:
        logger.error(f"Error testing Google embedder client: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_adalflow_embedder():
    """Test the AdalFlow embedder with Google client."""
    logger.info("Testing AdalFlow embedder with Google client...")
    
    try:
        import adalflow as adal
        from api.google_embedder_client import GoogleEmbedderClient
        
        # Create embedder
        client = GoogleEmbedderClient()
        embedder = adal.Embedder(
            model_client=client,
            model_kwargs={
                "model": "text-embedding-004",
                "task_type": "SEMANTIC_SIMILARITY"
            }
        )
        
        # Test embedding
        logger.info("Testing embedder with single input...")
        result = embedder("Hello world")
        logger.info(f"Embedder result type: {type(result)}")
        logger.info(f"Embedder result: {result}")
        
        if hasattr(result, 'data'):
            logger.info(f"Result data length: {len(result.data) if result.data else 0}")
        
        return True
        
    except Exception as e:
        logger.error(f"Error testing AdalFlow embedder: {e}")
        import traceback
        traceback.print_exc()
        return False

def test_document_processing():
    """Test document processing with Google embedder."""
    logger.info("Testing document processing with Google embedder...")
    
    try:
        from adalflow.core.types import Document
        from adalflow.components.data_process import ToEmbeddings
        from api.tools.embedder import get_embedder
        
        # Create some test documents
        docs = [
            Document(text="This is a test document.", meta_data={"file_path": "test1.txt"}),
            Document(text="Another test document here.", meta_data={"file_path": "test2.txt"})
        ]
        
        # Get the Google embedder
        embedder = get_embedder(embedder_type='google')
        logger.info(f"Embedder type: {type(embedder)}")
        
        # Process documents
        embedder_transformer = ToEmbeddings(embedder=embedder, batch_size=100)
        
        # Transform documents
        logger.info("Transforming documents...")
        transformed_docs = embedder_transformer(docs)
        
        logger.info(f"Transformed docs type: {type(transformed_docs)}")
        logger.info(f"Number of transformed docs: {len(transformed_docs)}")
        
        # Check the structure
        for i, doc in enumerate(transformed_docs):
            logger.info(f"Doc {i} type: {type(doc)}")
            logger.info(f"Doc {i} attributes: {dir(doc)}")
            if hasattr(doc, 'vector'):
                logger.info(f"Doc {i} vector type: {type(doc.vector)}")
                logger.info(f"Doc {i} vector length: {len(doc.vector) if doc.vector else 0}")
            else:
                logger.info(f"Doc {i} has no vector attribute")
        
        return transformed_docs
        
    except Exception as e:
        logger.error(f"Error testing document processing: {e}")
        import traceback
        traceback.print_exc()
        return False

def main():
    """Main test function."""
    logger.info("Starting Google embedder tests...")
    
    # Test 1: Direct client test
    if not test_google_embedder_client():
        logger.error("Google embedder client test failed")
        return False
    
    # Test 2: AdalFlow embedder test
    if not test_adalflow_embedder():
        logger.error("AdalFlow embedder test failed")
        return False
    
    # Test 3: Document processing test
    result = test_document_processing()
    if result is False:
        logger.error("Document processing test failed")
        return False
    
    logger.info("All tests completed successfully!")
    return True

if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)

================================================
FILE: tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ES2017",
    "lib": ["dom", "dom.iterable", "esnext"],
    "allowJs": true,
    "skipLibCheck": true,
    "strict": true,
    "noEmit": true,
    "esModuleInterop": true,
    "module": "esnext",
    "moduleResolution": "bundler",
    "resolveJsonModule": true,
    "isolatedModules": true,
    "jsx": "preserve",
    "incremental": true,
    "plugins": [
      {
        "name": "next"
      }
    ],
    "paths": {
      "@/*": ["./src/*"]
    }
  },
  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
  "exclude": ["node_modules"]
}