Repository: https-deeplearning-ai/agentic-ai-public
Branch: main
Commit: 0ee5559e7d54
Files: 10
Total size: 60.3 KB
Directory structure:
gitextract_gb1zi4pq/
├── .gitignore
├── Dockerfile
├── README.md
├── docker/
│ └── entrypoint.sh
├── main.py
├── requirements.txt
├── src/
│ ├── agents.py
│ ├── planning_agent.py
│ └── research_tools.py
└── templates/
└── index.html
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
================================================
FILE: Dockerfile
================================================
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=1
# Postgres (Debian default) + build deps
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libpq-dev \
postgresql postgresql-client postgresql-contrib \
curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt /app/requirements.txt
RUN pip install -r requirements.txt
COPY . /app
# Your entrypoint
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 8000 5432
CMD ["/entrypoint.sh"]
================================================
FILE: README.md
================================================
# Reflective Research Agent (FastAPI + Postgres, single container)
A FastAPI web app that plans a research workflow, runs tool-using agents (Tavily, arXiv, Wikipedia), and stores task state/results in Postgres.
This repo includes a Docker setup that runs **Postgres + the API in one container** (for local/dev).
## Features
* `/` serves a simple UI (Jinja2 template) to kick off a research task.
* `/generate_report` kicks off a threaded, multi-step agent workflow (planner → research/writer/editor).
* `/task_progress/{task_id}` live status for each step/substep.
* `/task_status/{task_id}` final status + report.
---
## Project layout (key paths)
```
.
├─ main.py # FastAPI app (your file shown above)
├─ src/
│ ├─ planning_agent.py # planner_agent(), executor_agent_step()
│ ├─ agents.py # research_agent, writer_agent, editor_agent (example)
│ └─ research_tools.py # tavily_search_tool, arxiv_search_tool, wikipedia_search_tool
├─ templates/
│ └─ index.html # UI page rendered by "/"
├─ static/ # optional static assets (css/js)
├─ docker/
│ └─ entrypoint.sh # starts Postgres, prepares DB, then launches Uvicorn
├─ requirements.txt
├─ Dockerfile
└─ README.md
```
> Make sure `templates/index.html` and (optionally) `static/` exist and are copied into the image.
---
## Prerequisites
* **Docker** (Desktop on Windows/macOS, or engine on Linux).
* API keys stored in a `.env` file:
```
OPENAI_API_KEY=your-open-api-key
TAVILY_API_KEY=your-tavily-api-key
```
* Python deps are installed by Docker from `requirements.txt`:
* `fastapi`, `uvicorn`, `sqlalchemy`, `python-dotenv`, `jinja2`, `requests`, `wikipedia`, etc.
* Plus any libs used by your `aisuite` client.
---
## Environment variables
The app **reads only `DATABASE_URL`** at startup.
* The container’s entrypoint sets a sane default for local dev:
```
postgresql://app:local@127.0.0.1:5432/appdb
```
* To use Tavily:
* Provide `TAVILY_API_KEY` (via `.env` or `-e`).
Optional (if you want to override defaults done by the entrypoint):
* `POSTGRES_USER` (default `app`)
* `POSTGRES_PASSWORD` (default `local`)
* `POSTGRES_DB` (default `appdb`)
---
## Build & Run (local/dev)
### 1) Build
```bash
docker build -t fastapi-postgres-service .
```
### 2) Run (foreground)
```bash
docker run --rm -it -p 8000:8000 -p 5432:5432 --name fpsvc --env-file .env fastapi-postgres-service
```
You should see logs like:
```
🚀 Starting Postgres cluster 17/main...
✅ Postgres is ready
CREATE ROLE
CREATE DATABASE
🔗 DATABASE_URL=postgresql://app:local@127.0.0.1:5432/appdb
INFO: Uvicorn running on http://0.0.0.0:8000
```
### 3) Open the app
* UI: [http://localhost:8000/](http://localhost:8000/)
* Docs: [http://localhost:8000/docs](http://localhost:8000/docs)
---
## API quickstart
### Kick off a run
```bash
curl -X POST http://localhost:8000/generate_report \
-H "Content-Type: application/json" \
-d '{"prompt": "Large Language Models for scientific discovery", "model":"openai:gpt-4o"}'
# -> {"task_id": "UUID..."}
```
### Poll progress
```bash
curl http://localhost:8000/task_progress/<TASK_ID>
```
### Final status + report
```bash
curl http://localhost:8000/task_status/<TASK_ID>
```
---
## Troubleshooting
**I open [http://localhost:8000](http://localhost:8000) and see nothing / errors**
* Confirm `templates/index.html` exists inside the container:
```bash
docker exec -it fpsvc bash -lc "ls -l /app/templates && ls -l /app/static || true"
```
* Watch logs while you load the page:
```bash
docker logs -f fpsvc
```
**Container asks for a Postgres password on startup**
* The entrypoint uses **UNIX socket + peer auth** for admin tasks (no password).
Ensure you’re not calling `psql -h 127.0.0.1 -U postgres` in the script—use:
```bash
su -s /bin/bash postgres -c "psql -c '...'"
```
**`DATABASE_URL not set` error**
* The entrypoint exports a default DSN. If you overrode it, ensure it’s valid:
```
postgresql://<user>:<password>@<host>:<port>/<database>
```
**Tables disappear on restart**
* In your `main.py` you call `Base.metadata.drop_all(...)` on startup.
Comment it out or guard with an env flag:
```python
if os.getenv("RESET_DB_ON_STARTUP") == "1":
Base.metadata.drop_all(bind=engine)
```
**Tavily / arXiv / Wikipedia errors**
* Provide `TAVILY_API_KEY` and ensure network access, provide in the root dir and `.env` file as follows:
```
# OpenAI API Key
OPENAI_API_KEY=your-open-api-key
TAVILY_API_KEY=your-tavily-api-key
```
* Wikipedia rate limits sometimes; try later or handle exceptions gracefully.
---
## Development tips
* **Hot reload** (optional): For dev, you can run Uvicorn with `--reload` if you mount your code:
```bash
docker run --rm -it -p 8000:8000 -p 5432:5432 \
-v "$PWD":/app \
--name fpsvc fastapi-postgres-service \
bash -lc "pg_ctlcluster \$(psql -V | awk '{print \$3}' | cut -d. -f1) main start && uvicorn main:app --host 0.0.0.0 --port 8000 --reload"
```
* **Connect to DB from host:**
```bash
psql "postgresql://app:local@localhost:5432/appdb"
```
---
================================================
FILE: docker/entrypoint.sh
================================================
#!/usr/bin/env bash
set -euo pipefail
# --- Start Debian's default Postgres cluster ---
PG_MAJOR="$(psql -V | awk '{print $3}' | cut -d. -f1)"
echo "🚀 Starting Postgres cluster ${PG_MAJOR}/main..."
pg_ctlcluster "${PG_MAJOR}" main start
# Wait till ready
for i in $(seq 1 60); do
if pg_isready -h 127.0.0.1 -p 5432 -U postgres >/dev/null 2>&1; then
echo "✅ Postgres is ready"
break
fi
sleep 1
done
# --- app/DSN variables ---
: "${POSTGRES_USER:=app}"
: "${POSTGRES_PASSWORD:=local}"
: "${POSTGRES_DB:=appdb}"
# Creates role if it does not exist
if ! su -s /bin/bash postgres -c "psql -tAc \"SELECT 1 FROM pg_roles WHERE rolname='${POSTGRES_USER}'\"" | grep -q 1; then
su -s /bin/bash postgres -c "psql -c \"CREATE USER ${POSTGRES_USER} WITH PASSWORD '${POSTGRES_PASSWORD}';\""
fi
# Creates DB if does not exist
if ! su -s /bin/bash postgres -c "psql -tAc \"SELECT 1 FROM pg_database WHERE datname='${POSTGRES_DB}'\"" | grep -q 1; then
su -s /bin/bash postgres -c "psql -c \"CREATE DATABASE ${POSTGRES_DB} OWNER ${POSTGRES_USER};\""
fi
# Define DNS for app
export DATABASE_URL="${DATABASE_URL:-postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@127.0.0.1:5432/${POSTGRES_DB}}"
echo "🔗 DATABASE_URL=${DATABASE_URL}"
# Launch FastAPI
exec uvicorn main:app --host 0.0.0.0 --port 8000
================================================
FILE: main.py
================================================
import os
import uuid
import json
import threading
from datetime import datetime
from typing import Optional, Literal
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from sqlalchemy import create_engine, Column, Text, DateTime, String
from sqlalchemy.orm import sessionmaker, declarative_base
from dotenv import load_dotenv
from src.planning_agent import planner_agent, executor_agent_step
import html, textwrap
# === Load env vars ===
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
# Fix for Heroku's postgres:// URL format
if DATABASE_URL.startswith("postgres://"):
DATABASE_URL = DATABASE_URL.replace("postgres://", "postgresql://", 1)
if not DATABASE_URL:
raise RuntimeError("DATABASE_URL not set")
# === DB setup ===
Base = declarative_base()
engine = create_engine(DATABASE_URL, echo=False, future=True)
SessionLocal = sessionmaker(bind=engine)
class Task(Base):
__tablename__ = "tasks"
id = Column(String, primary_key=True, index=True)
prompt = Column(Text)
status = Column(String)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow)
result = Column(Text)
try:
Base.metadata.drop_all(bind=engine)
except Exception as e:
print(f"\u274c DB creation failed: {e}")
try:
Base.metadata.create_all(bind=engine)
except Exception as e:
print(f"\u274c DB creation failed: {e}")
# === FastAPI ===
app = FastAPI()
app.add_middleware(
CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]
)
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
task_progress = {}
class PromptRequest(BaseModel):
prompt: str
@app.get("/", response_class=HTMLResponse)
def read_index(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.get("/api", response_class=JSONResponse)
def health_check(request: Request):
return {"status": "ok"}
@app.post("/generate_report")
def generate_report(req: PromptRequest):
task_id = str(uuid.uuid4())
db = SessionLocal()
db.add(Task(id=task_id, prompt=req.prompt, status="running"))
db.commit()
db.close()
task_progress[task_id] = {"steps": []}
initial_plan_steps = planner_agent(req.prompt)
for step_title in initial_plan_steps:
task_progress[task_id]["steps"].append(
{
"title": step_title,
"status": "pending",
"description": "Awaiting execution",
"substeps": [],
}
)
thread = threading.Thread(
target=run_agent_workflow, args=(task_id, req.prompt, initial_plan_steps)
)
thread.start()
return {"task_id": task_id}
@app.get("/task_progress/{task_id}")
def get_task_progress(task_id: str):
return task_progress.get(task_id, {"steps": []})
@app.get("/task_status/{task_id}")
def get_task_status(task_id: str):
db = SessionLocal()
task = db.query(Task).filter(Task.id == task_id).first()
db.close()
if not task:
raise HTTPException(status_code=404, detail="Task not found")
return {
"status": task.status,
"result": json.loads(task.result) if task.result else None,
}
def format_history(history):
return "\n\n".join(
f"🔹 {title}\n{desc}\n\n📝 Output:\n{output}" for title, desc, output in history
)
def run_agent_workflow(task_id: str, prompt: str, initial_plan_steps: list):
steps_data = task_progress[task_id]["steps"]
execution_history = []
def update_step_status(index, status, description="", substep=None):
if index < len(steps_data):
steps_data[index]["status"] = status
if description:
steps_data[index]["description"] = description
if substep:
steps_data[index]["substeps"].append(substep)
steps_data[index]["updated_at"] = datetime.utcnow().isoformat()
try:
for i, plan_step_title in enumerate(initial_plan_steps):
update_step_status(i, "running", f"Executing: {plan_step_title}")
actual_step_description, agent_name, output = executor_agent_step(
plan_step_title, execution_history, prompt
)
execution_history.append([plan_step_title, actual_step_description, output])
def esc(s: str) -> str:
return html.escape(s or "")
def nl2br(s: str) -> str:
return esc(s).replace("\n", "<br>")
# ...
update_step_status(
i,
"done",
f"Completed: {plan_step_title}",
{
"title": f"Called {agent_name}",
"content": f"""
<div style='border:1px solid #ccc; border-radius:8px; padding:10px; margin:8px 0; background:#fff;'>
<div style='font-weight:bold; color:#2563eb;'>📘 User Prompt</div>
<div style='white-space:pre-wrap;'>{prompt}</div>
<div style='font-weight:bold; color:#16a34a; margin-top:8px;'>📜 Previous Step</div>
<pre style='white-space:pre-wrap; background:#f9fafb; padding:6px; border-radius:6px; margin:0;'>
{format_history(execution_history[-2:-1])}
</pre>
<div style='font-weight:bold; color:#f59e0b; margin-top:8px;'>🧹 Your next task</div>
<div style='white-space:pre-wrap;'>{actual_step_description}</div>
<div style='font-weight:bold; color:#10b981; margin-top:8px;'>✅ Output</div>
<!-- ⚠️ NO <pre> AQUÍ -->
<div style='white-space:pre-wrap;'>
{output}
</div>
</div>
""".strip(),
},
)
final_report_markdown = (
execution_history[-1][-1] if execution_history else "No report generated."
)
result = {"html_report": final_report_markdown, "history": steps_data}
db = SessionLocal()
task = db.query(Task).filter(Task.id == task_id).first()
task.status = "done"
task.result = json.dumps(result)
task.updated_at = datetime.utcnow()
db.commit()
db.close()
except Exception as e:
print(f"Workflow error for task {task_id}: {e}")
if steps_data:
error_step_index = next(
(i for i, s in enumerate(steps_data) if s["status"] == "running"),
len(steps_data) - 1,
)
if error_step_index >= 0:
update_step_status(
error_step_index,
"error",
f"Error during execution: {e}",
{"title": "Error", "content": str(e)},
)
db = SessionLocal()
task = db.query(Task).filter(Task.id == task_id).first()
task.status = "error"
task.updated_at = datetime.utcnow()
db.commit()
db.close()
================================================
FILE: requirements.txt
================================================
fastapi
uvicorn[standard]
sqlalchemy
psycopg2-binary
python-dotenv
jinja2
openai
tavily-python
wikipedia
requests
aisuite
docstring_parser
# Core
pdfminer.six
pymupdf # optional but recommended for faster/better PDF text extraction
# Web/knowledge tools you used in other snippets (optional)
python-dotenv
# SQLite via SQLAlchemy
SQLAlchemy
================================================
FILE: src/agents.py
================================================
from datetime import datetime
from urllib import response
from aisuite import Client
from src.research_tools import (
arxiv_search_tool,
tavily_search_tool,
wikipedia_search_tool,
)
client = Client()
# === Research Agent ===
def research_agent(
prompt: str, model: str = "openai:gpt-4.1-mini", return_messages: bool = False
):
print("==================================")
print("🔍 Research Agent")
print("==================================")
full_prompt = f"""
You are an advanced research assistant with expertise in information retrieval and academic research methodology. Your mission is to gather comprehensive, accurate, and relevant information on any topic requested by the user.
## AVAILABLE RESEARCH TOOLS:
1. **`tavily_search_tool`**: General web search engine
- USE FOR: Recent news, current events, blogs, websites, industry reports, and non-academic sources
- BEST FOR: Up-to-date information, diverse perspectives, practical applications, and general knowledge
2. **`arxiv_search_tool`**: Academic publication database
- USE FOR: Peer-reviewed research papers, technical reports, and scholarly articles
- LIMITED TO THESE DOMAINS ONLY:
* Computer Science
* Mathematics
* Physics
* Statistics
* Quantitative Biology
* Quantitative Finance
* Electrical Engineering and Systems Science
* Economics
- BEST FOR: Scientific evidence, theoretical frameworks, and technical details in supported fields
3. **`wikipedia_search_tool`**: Encyclopedia resource
- USE FOR: Background information, definitions, overviews, historical context
- BEST FOR: Establishing foundational knowledge and understanding basic concepts
## RESEARCH METHODOLOGY:
1. **Analyze Request**: Identify the core research questions and knowledge domains
2. **Plan Search Strategy**: Determine which tools are most appropriate for the topic
3. **Execute Searches**: Use the selected tools with effective keywords and queries
4. **Evaluate Sources**: Prioritize credibility, relevance, recency, and diversity
5. **Synthesize Findings**: Organize information logically with clear source attribution
6. **Document Search Process**: Note which tools were used and why
## TOOL SELECTION GUIDELINES:
- For scientific/academic questions in supported domains → Use `arxiv_search_tool`
- For recent developments, news, or practical information → Use `tavily_search_tool`
- For fundamental concepts or historical context → Use `wikipedia_search_tool`
- For comprehensive research → Use multiple tools strategically
- NEVER use `arxiv_search_tool` for domains outside its supported list
- ALWAYS verify information across multiple sources when possible
## OUTPUT FORMAT:
Present your research findings in a structured format that includes:
1. **Summary of Research Approach**: Tools used and search strategy
2. **Key Findings**: Organized by subtopic or source
3. **Source Details**: Include URLs, titles, authors, and publication dates
4. **Limitations**: Note any gaps in available information
Today is {datetime.now().strftime("%Y-%m-%d")}.
USER RESEARCH REQUEST:
{prompt}
""".strip()
messages = [{"role": "user", "content": full_prompt}]
tools = [arxiv_search_tool, tavily_search_tool, wikipedia_search_tool]
try:
resp = client.chat.completions.create(
model=model,
messages=messages,
tools=tools,
tool_choice="auto",
max_turns=5,
temperature=0.0, # Use deterministic output
)
content = resp.choices[0].message.content or ""
# ---- Collect tool calls from intermediate_responses and intermediate_messages
calls = []
# A) From intermediate_responses
for ir in getattr(resp, "intermediate_responses", []) or []:
try:
tcs = ir.choices[0].message.tool_calls or []
for tc in tcs:
calls.append((tc.function.name, tc.function.arguments))
except Exception:
pass
# B) From intermediate_messages on the final message
for msg in getattr(resp.choices[0].message, "intermediate_messages", []) or []:
# assistant message with tool_calls
if hasattr(msg, "tool_calls") and msg.tool_calls:
for tc in msg.tool_calls:
calls.append((tc.function.name, tc.function.arguments))
# Dedup while preserving order
seen = set()
dedup_calls = []
for name, args in calls:
key = (name, args)
if key not in seen:
seen.add(key)
dedup_calls.append((name, args))
# Pretty print args: JSON->dict if possible
tool_lines = []
for name, args in dedup_calls:
arg_text = str(args)
try:
import json as _json
parsed = _json.loads(args) if isinstance(args, str) else args
if isinstance(parsed, dict):
kv = ", ".join(f"{k}={repr(v)}" for k, v in parsed.items())
arg_text = kv
except Exception:
# keep raw string if not JSON
pass
tool_lines.append(f"- {name}({arg_text})")
if tool_lines:
tools_html = (
"<h2 style='font-size:1.5em; color:#2563eb;'>📎 Tools used</h2>"
)
tools_html += (
"<ul>" + "".join(f"<li>{line}</li>" for line in tool_lines) + "</ul>"
)
content += "\n\n" + tools_html
print("✅ Output:\n", content)
return content, messages
except Exception as e:
print("❌ Error:", e)
return f"[Model Error: {str(e)}]", messages
def writer_agent(
prompt: str,
model: str = "openai:gpt-4.1-mini",
min_words_total: int = 2400,
min_words_per_section: int = 400,
max_tokens: int = 15000,
retries: int = 1,
):
print("==================================")
print("✍️ Writer Agent")
print("==================================")
system_message = """
You are an expert academic writer with a PhD-level understanding of scholarly communication. Your task is to synthesize research materials into a comprehensive, well-structured academic report.
## REPORT REQUIREMENTS:
- Produce a COMPLETE, POLISHED, and PUBLICATION-READY academic report in Markdown format
- Create original content that thoroughly analyzes the provided research materials
- DO NOT merely summarize the sources; develop a cohesive narrative with critical analysis
- Length should be appropriate to thoroughly cover the topic (typically 1500-3000 words)
## MANDATORY STRUCTURE:
1. **Title**: Clear, concise, and descriptive of the content
2. **Abstract**: Brief summary (100-150 words) of the report's purpose, methods, and key findings
3. **Introduction**: Present the topic, research question/problem, significance, and outline of the report
4. **Background/Literature Review**: Contextualize the topic within existing scholarship
5. **Methodology**: If applicable, describe research methods, data collection, and analytical approaches
6. **Key Findings/Results**: Present the primary outcomes and evidence
7. **Discussion**: Interpret findings, address implications, limitations, and connections to broader field
8. **Conclusion**: Synthesize main points and suggest directions for future research
9. **References**: Complete list of all cited works
## ACADEMIC WRITING GUIDELINES:
- Maintain formal, precise, and objective language throughout
- Use discipline-appropriate terminology and concepts
- Support all claims with evidence and reasoning
- Develop logical flow between ideas, paragraphs, and sections
- Include relevant examples, case studies, data, or equations to strengthen arguments
- Address potential counterarguments and limitations
## CITATION AND REFERENCE RULES:
- Use numeric inline citations [1], [2], etc. for all borrowed ideas and information
- Every claim based on external sources MUST have a citation
- Each inline citation must correspond to a complete entry in the References section
- Every reference listed must be cited at least once in the text
- Preserve ALL original URLs, DOIs, and bibliographic information from source materials
- Format references consistently according to academic standards
## FORMATTING GUIDELINES:
- Use Markdown syntax for all formatting (headings, emphasis, lists, etc.)
- Include appropriate section headings and subheadings to organize content
- Format any equations, tables, or figures according to academic conventions
- Use bullet points or numbered lists when appropriate for clarity
- Use html syntax to handle all links with target="_blank", so user can always open link in new tab on both html and markdown format
Output the complete report in Markdown format only. Do not include meta-commentary about the writing process.
INTERNAL CHECKLIST (DO NOT INCLUDE IN OUTPUT):
- [ ] Incorporated all provided research materials
- [ ] Developed original analysis beyond mere summarization
- [ ] Included all mandatory sections with appropriate content
- [ ] Used proper inline citations for all borrowed content
- [ ] Created complete References section with all cited sources
- [ ] Maintained academic tone and language throughout
- [ ] Ensured logical flow and coherent structure
- [ ] Preserved all source URLs and bibliographic information
""".strip()
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
]
def _call(messages_):
resp = client.chat.completions.create(
model=model,
messages=messages_,
temperature=0,
max_tokens=max_tokens,
)
return resp.choices[0].message.content or ""
def _word_count(md_text: str) -> int:
import re
words = re.findall(r"\b\w+\b", md_text)
return len(words)
content = _call(messages)
print("✅ Output:\n", content)
return content, messages
def editor_agent(
prompt: str,
model: str = "openai:gpt-4.1-mini",
target_min_words: int = 2400,
):
print("==================================")
print("🧠 Editor Agent")
print("==================================")
system_message = """
You are a professional academic editor with expertise in improving scholarly writing across disciplines. Your task is to refine and elevate the quality of the academic text provided.
## Your Editing Process:
1. Analyze the overall structure, argument flow, and coherence of the text
2. Ensure logical progression of ideas with clear topic sentences and transitions between paragraphs
3. Improve clarity, precision, and conciseness of language while maintaining academic tone
4. Verify technical accuracy (to the extent possible based on context)
5. Enhance readability through appropriate formatting and organization
## Specific Elements to Address:
- Strengthen thesis statements and main arguments
- Clarify complex concepts with additional explanations or examples where needed
- Add relevant equations, diagrams, or illustrations (described in markdown) when they would enhance understanding
- Ensure proper integration of evidence and maintain academic rigor
- Standardize terminology and eliminate redundancies
- Improve sentence variety and paragraph structure
- Preserve all citations [1], [2], etc., and maintain the integrity of the References section
## Formatting Guidelines:
- Use markdown formatting consistently for headings, emphasis, lists, etc.
- Structure content with appropriate section headings and subheadings
- Format equations, tables, and figures according to academic standards
Return only the revised, polished text in Markdown format without explanatory comments about your edits.
""".strip()
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": prompt},
]
response = client.chat.completions.create(
model=model, messages=messages, temperature=0
)
content = response.choices[0].message.content
print("✅ Output:\n", content)
return content, messages
================================================
FILE: src/planning_agent.py
================================================
import json
import re
from typing import List
from datetime import datetime
from aisuite import Client
from src.agents import (
research_agent,
writer_agent,
editor_agent,
)
client = Client()
def clean_json_block(raw: str) -> str:
raw = raw.strip()
if raw.startswith("```"):
raw = re.sub(r"^```[a-zA-Z]*\n?", "", raw)
raw = re.sub(r"\n?```$", "", raw)
return raw.strip("` \n")
from typing import List
import json, ast
def planner_agent(topic: str, model: str = "openai:o4-mini") -> List[str]:
prompt = f"""
You are a planning agent responsible for organizing a research workflow using multiple intelligent agents.
🧠 Available agents:
- Research agent: MUST begin with a broad **web search using Tavily** to identify only **relevant** and **authoritative** items (e.g., high-impact venues, seminal works, surveys, or recent comprehensive sources). The output of this step MUST capture for each candidate: title, authors, year, venue/source, URL, and (if available) DOI.
- Research agent: AFTER the Tavily step, perform a **targeted arXiv search** ONLY for the candidates discovered in the web step (match by title/author/DOI). If an arXiv preprint/version exists, record its arXiv URL and version info. Do NOT run a generic arXiv search detached from the Tavily results.
- Writer agent: drafts based on research findings.
- Editor agent: reviews, reflects on, and improves drafts.
🎯 Produce a clear step-by-step research plan **as a valid Python list of strings** (no markdown, no explanations).
Each step must be atomic, actionable, and assigned to one of the agents.
Maximum of 7 steps.
🚫 DO NOT include steps like “create CSV”, “set up repo”, “install packages”.
✅ Focus on meaningful research tasks (search, extract, rank, draft, revise).
✅ The FIRST step MUST be exactly:
"Research agent: Use Tavily to perform a broad web search and collect top relevant items (title, authors, year, venue/source, URL, DOI if available)."
✅ The SECOND step MUST be exactly:
"Research agent: For each collected item, search on arXiv to find matching preprints/versions and record arXiv URLs (if they exist)."
🔚 The FINAL step MUST instruct the writer agent to generate a comprehensive Markdown report that:
- Uses all findings and outputs from previous steps
- Includes inline citations (e.g., [1], (Wikipedia/arXiv))
- Includes a References section with clickable links for all citations
- Preserves earlier sources
- Is detailed and self-contained
Topic: "{topic}"
"""
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
temperature=1,
)
raw = response.choices[0].message.content.strip()
# --- robust parsing: JSON -> ast -> fallback ---
def _coerce_to_list(s: str) -> List[str]:
# try strict JSON
try:
obj = json.loads(s)
if isinstance(obj, list) and all(isinstance(x, str) for x in obj):
return obj[:7]
except json.JSONDecodeError:
pass
# try Python literal list
try:
obj = ast.literal_eval(s)
if isinstance(obj, list) and all(isinstance(x, str) for x in obj):
return obj[:7]
except Exception:
pass
# try to extract code fence if present
if s.startswith("```") and s.endswith("```"):
inner = s.strip("`")
try:
obj = ast.literal_eval(inner)
if isinstance(obj, list) and all(isinstance(x, str) for x in obj):
return obj[:7]
except Exception:
pass
return []
steps = _coerce_to_list(raw)
# enforce ordering & minimal contract
required_first = "Research agent: Use Tavily to perform a broad web search and collect top relevant items (title, authors, year, venue/source, URL, DOI if available)."
required_second = "Research agent: For each collected item, search on arXiv to find matching preprints/versions and record arXiv URLs (if they exist)."
final_required = "Writer agent: Generate the final comprehensive Markdown report with inline citations and a complete References section with clickable links."
def _ensure_contract(steps_list: List[str]) -> List[str]:
if not steps_list:
return [
required_first,
required_second,
"Research agent: Synthesize and rank findings by relevance, recency, and authority; deduplicate by title/DOI.",
"Writer agent: Draft a structured outline based on the ranked evidence.",
"Editor agent: Review for coherence, coverage, and citation completeness; request fixes.",
final_required,
]
# inject/replace first two if missing or out of order
steps_list = [s for s in steps_list if isinstance(s, str)]
if not steps_list or steps_list[0] != required_first:
steps_list = [required_first] + steps_list
if len(steps_list) < 2 or steps_list[1] != required_second:
# remove any generic arxiv step that is not tied to Tavily results
steps_list = (
[steps_list[0]]
+ [required_second]
+ [
s
for s in steps_list[1:]
if "arXiv" not in s or "For each collected item" in s
]
)
# ensure final step requirement present
if final_required not in steps_list:
steps_list.append(final_required)
# cap to 7
return steps_list[:7]
steps = _ensure_contract(steps)
return steps
def executor_agent_step(step_title: str, history: list, prompt: str):
"""
Executes a step of the executor agent.
Returns:
- step_title (str)
- agent_name (str)
- output (str)
"""
# Construir contexto enriquecido estructurado
context = f"📘 User Prompt:\n{prompt}\n\n📜 History so far:\n"
for i, (desc, agent, output) in enumerate(history):
if "draft" in desc.lower() or agent == "writer_agent":
context += f"\n✍️ Draft (Step {i + 1}):\n{output.strip()}\n"
elif "feedback" in desc.lower() or agent == "editor_agent":
context += f"\n🧠 Feedback (Step {i + 1}):\n{output.strip()}\n"
elif "research" in desc.lower() or agent == "research_agent":
context += f"\n🔍 Research (Step {i + 1}):\n{output.strip()}\n"
else:
context += f"\n🧩 Other (Step {i + 1}) by {agent}:\n{output.strip()}\n"
enriched_task = f"""{context}
🧩 Your next task:
{step_title}
"""
# Seleccionar agente basado en el paso
step_lower = step_title.lower()
if "research" in step_lower:
content, _ = research_agent(prompt=enriched_task)
print("🔍 Research Agent Output:", content)
return step_title, "research_agent", content
elif "draft" in step_lower or "write" in step_lower:
content, _ = writer_agent(prompt=enriched_task)
return step_title, "writer_agent", content
elif "revise" in step_lower or "edit" in step_lower or "feedback" in step_lower:
content, _ = editor_agent(prompt=enriched_task)
return step_title, "editor_agent", content
else:
raise ValueError(f"Unknown step type: {step_title}")
================================================
FILE: src/research_tools.py
================================================
from typing import List, Dict
import requests
import xml.etree.ElementTree as ET
from typing import List, Dict, Optional
import os, re, tempfile
import requests
from pdfminer.high_level import extract_text
session = requests.Session()
session.headers.update(
{"User-Agent": "LF-ADP-Agent/1.0 (mailto:your.email@example.com)"}
)
## -----
from typing import List, Dict, Optional
import os, re, time, tempfile
import requests
import xml.etree.ElementTree as ET
# ----- Session with retries & headers -----
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from typing import List, Dict, Optional
import os, re, time
import requests
import xml.etree.ElementTree as ET
from io import BytesIO
from typing import List, Dict, Optional
import os, re, time
import requests
import xml.etree.ElementTree as ET
from io import BytesIO
# ----- Session with retries & headers -----
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
def _build_session(
user_agent: str = "LF-ADP-Agent/1.0 (mailto:your.email@example.com)",
) -> requests.Session:
s = requests.Session()
s.headers.update(
{
"User-Agent": user_agent,
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
)
retry = Retry(
total=5,
connect=5,
read=5,
backoff_factor=0.6,
status_forcelist=(429, 500, 502, 503, 504),
allowed_methods=frozenset(["GET", "HEAD"]),
raise_on_redirect=False,
raise_on_status=False,
)
adapter = HTTPAdapter(max_retries=retry, pool_connections=10, pool_maxsize=20)
s.mount("https://", adapter)
s.mount("http://", adapter)
return s
session = _build_session()
# ----- Utilities -----
def ensure_pdf_url(abs_or_pdf_url: str) -> str:
url = abs_or_pdf_url.strip().replace("http://", "https://")
if "/pdf/" in url and url.endswith(".pdf"):
return url
url = url.replace("/abs/", "/pdf/")
if not url.endswith(".pdf"):
url += ".pdf"
return url
def _safe_filename(name: str) -> str:
import re
name = re.sub(r"[^A-Za-z0-9._-]+", "_", name)
if not name.lower().endswith(".pdf"):
name += ".pdf"
return name
def clean_text(s: str) -> str:
s = re.sub(r"-\n", "", s) # "transfor-\nmers" -> "transformers"
s = re.sub(r"\r\n|\r", "\n", s) # normaliza saltos
s = re.sub(r"[ \t]+", " ", s) # colapsa espacios
s = re.sub(r"\n{3,}", "\n\n", s) # no más de 1 línea en blanco seguida
return s.strip()
def fetch_pdf_bytes(pdf_url: str, timeout: int = 90) -> bytes:
r = session.get(pdf_url, timeout=timeout, allow_redirects=True)
r.raise_for_status()
return r.content
def pdf_bytes_to_text(pdf_bytes: bytes, max_pages: Optional[int] = None) -> str:
# 1) PyMuPDF
try:
import fitz # PyMuPDF
out = []
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
n = len(doc)
limit = n if max_pages is None else min(max_pages, n)
for i in range(limit):
out.append(doc.load_page(i).get_text("text"))
return "\n".join(out)
except Exception:
pass
# 2) pdfminer.six
try:
from pdfminer.high_level import extract_text_to_fp
buf_in = BytesIO(pdf_bytes)
buf_out = BytesIO()
extract_text_to_fp(buf_in, buf_out)
return buf_out.getvalue().decode("utf-8", errors="ignore")
except Exception as e:
raise RuntimeError(f"PDF text extraction failed: {e}")
def maybe_save_pdf(pdf_bytes: bytes, dest_dir: str, filename: str) -> str:
os.makedirs(dest_dir, exist_ok=True)
path = os.path.join(dest_dir, _safe_filename(filename))
with open(path, "wb") as f:
f.write(pdf_bytes)
return path
# ----- arXiv search -----
from typing import List, Dict
import time, requests, xml.etree.ElementTree as ET
from io import BytesIO
# session = _build_session()
# ensure_pdf_url(), clean_text(), fetch_pdf_bytes(), pdf_bytes_to_text(), maybe_save_pdf()
def arxiv_search_tool(
query: str,
max_results: int = 3,
) -> List[Dict]:
"""
Busca en arXiv y devuelve resultados con `summary` sobrescrito
para contener el texto extraído del PDF (full_text si es posible).
"""
# ===== FLAGS INTERNOS =====
_INCLUDE_PDF = True
_EXTRACT_TEXT = True
_MAX_PAGES = 6
_TEXT_CHARS = 5000
_SAVE_FULL_TEXT = False
_SLEEP_SECONDS = 1.0
# ==========================
api_url = (
"https://export.arxiv.org/api/query"
f"?search_query=all:{requests.utils.quote(query)}&start=0&max_results={max_results}"
)
out: List[Dict] = []
try:
resp = session.get(api_url, timeout=60)
resp.raise_for_status()
except requests.exceptions.RequestException as e:
return [{"error": f"arXiv API request failed: {e}"}]
try:
root = ET.fromstring(resp.content)
ns = {"atom": "http://www.w3.org/2005/Atom"}
for entry in root.findall("atom:entry", ns):
title = (
entry.findtext("atom:title", default="", namespaces=ns) or ""
).strip()
published = (
entry.findtext("atom:published", default="", namespaces=ns) or ""
)[:10]
url_abs = entry.findtext("atom:id", default="", namespaces=ns) or ""
# original abstract
abstract_summary = (
entry.findtext("atom:summary", default="", namespaces=ns) or ""
).strip()
authors = []
for a in entry.findall("atom:author", ns):
nm = a.findtext("atom:name", default="", namespaces=ns)
if nm:
authors.append(nm)
link_pdf = None
for link in entry.findall("atom:link", ns):
if link.attrib.get("title") == "pdf":
link_pdf = link.attrib.get("href")
break
if not link_pdf and url_abs:
link_pdf = ensure_pdf_url(url_abs)
item = {
"title": title,
"authors": authors,
"published": published,
"url": url_abs,
"summary": abstract_summary,
"link_pdf": link_pdf,
}
pdf_bytes = None
if (_INCLUDE_PDF or _EXTRACT_TEXT) and link_pdf:
try:
pdf_bytes = fetch_pdf_bytes(link_pdf, timeout=90)
time.sleep(_SLEEP_SECONDS)
except Exception as e:
item["pdf_error"] = f"PDF fetch failed: {e}"
if _EXTRACT_TEXT and pdf_bytes:
try:
text = pdf_bytes_to_text(pdf_bytes, max_pages=_MAX_PAGES)
text = clean_text(text) if text else ""
if text:
if _SAVE_FULL_TEXT:
item["summary"] = text
else:
item["summary"] = text[:_TEXT_CHARS]
except Exception as e:
item["text_error"] = f"Text extraction failed: {e}"
out.append(item)
return out
except ET.ParseError as e:
return [{"error": f"arXiv API XML parse failed: {e}"}]
except Exception as e:
return [{"error": f"Unexpected error: {e}"}]
# ---- Tool def ----
arxiv_tool_def = {
"type": "function",
"function": {
"name": "arxiv_search_tool",
"description": "Searches arXiv and (internally) fetches PDFs to memory and extracts text.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search keywords."},
"max_results": {"type": "integer", "default": 3},
},
"required": ["query"],
},
},
}
## -----
import os
from dotenv import load_dotenv
from tavily import TavilyClient
load_dotenv() # Loads environment variables from a .env file
def tavily_search_tool(
query: str, max_results: int = 5, include_images: bool = False
) -> list[dict]:
"""
Perform a search using the Tavily API.
Args:
query (str): The search query.
max_results (int): Number of results to return (default 5).
include_images (bool): Whether to include image results.
Returns:
List[dict]: A list of dictionaries with keys like 'title', 'content', and 'url'.
"""
api_key = os.getenv("TAVILY_API_KEY")
if not api_key:
raise ValueError("TAVILY_API_KEY not found in environment variables.")
client = TavilyClient(api_key, api_base_url=os.getenv("DLAI_TAVILY_BASE_URL"))
try:
response = client.search(
query=query, max_results=max_results, include_images=include_images
)
results = []
for r in response.get("results", []):
results.append(
{
"title": r.get("title", ""),
"content": r.get("content", ""),
"url": r.get("url", ""),
}
)
if include_images:
for img_url in response.get("images", []):
results.append({"image_url": img_url})
return results
except Exception as e:
return [{"error": str(e)}] # For LLM-friendly agents
tavily_tool_def = {
"type": "function",
"function": {
"name": "tavily_search_tool",
"description": "Performs a general-purpose web search using the Tavily API.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search keywords for retrieving information from the web.",
},
"max_results": {
"type": "integer",
"description": "Maximum number of results to return.",
"default": 5,
},
"include_images": {
"type": "boolean",
"description": "Whether to include image results.",
"default": False,
},
},
"required": ["query"],
},
},
}
## Wikipedia search tool
from typing import List, Dict
import wikipedia
def wikipedia_search_tool(query: str, sentences: int = 5) -> List[Dict]:
"""
Searches Wikipedia for a summary of the given query.
Args:
query (str): Search query for Wikipedia.
sentences (int): Number of sentences to include in the summary.
Returns:
List[Dict]: A list with a single dictionary containing title, summary, and URL.
"""
try:
page_title = wikipedia.search(query)[0]
page = wikipedia.page(page_title)
summary = wikipedia.summary(page_title, sentences=sentences)
return [{"title": page.title, "summary": summary, "url": page.url}]
except Exception as e:
return [{"error": str(e)}]
# Tool definition
wikipedia_tool_def = {
"type": "function",
"function": {
"name": "wikipedia_search_tool",
"description": "Searches for a Wikipedia article summary by query string.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search keywords for the Wikipedia article.",
},
"sentences": {
"type": "integer",
"description": "Number of sentences in the summary.",
"default": 5,
},
},
"required": ["query"],
},
},
}
# Tool mapping
tool_mapping = {
"tavily_search_tool": tavily_search_tool,
"arxiv_search_tool": arxiv_search_tool,
"wikipedia_search_tool": wikipedia_search_tool,
}
================================================
FILE: templates/index.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Reflective Research Agents with Tools Use</title>
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
.step-header { cursor: pointer; }
.step-running { background-color: #fff3cd; }
.step-done { background-color: #d1e7dd; }
.step-error { background-color: #f8d7da; }
.step-pending { background-color: #e2e3e5; }
.substep { font-size: 0.95em; margin-left: 1.5rem; }
.substep-header { cursor: pointer; font-weight: bold; }
.status-icon { margin-right: 0.5rem; }
#stepStatusList div { margin-bottom: 0.25rem; }
#finalReport { background-color: #fff; border: 1px solid #ccc; padding: 1rem; border-radius: 5px; }
</style>
</head>
<body class="p-4">
<div class="container">
<h2 class="mb-4">🧠 Reflective Research Agent — Planning and Tool Use</h2>
<!-- Intro Card -->
<div class="card mb-3 shadow-sm border-0">
<div class="card-body">
<p class="mb-3">
<strong>In this item,</strong> you can try the <em>research agent</em> that Andrew walked through earlier in the module for yourself.
</p>
<p class="mb-3">
To use the agent, enter a prompt in the box below. Research agents work best when you give some details in your prompt to guide their work.
Try adding <strong>3–4 details</strong> that provide more context about your research question.
You don't have to write much—just a few sentences will be enough.
</p>
<p class="mb-3">
To see what the agent is doing in the background, expand the box associated with each step of the workflow by clicking on the
<strong>➕</strong> sign next to the agent call.
This will reveal the behind-the-scenes working of the agent for that step.
</p>
<p class="mb-3">
This agent is powered by <strong>GPT-4.1-mini</strong> and <strong>o4-mini</strong>, two lower-cost
openAI models that are not as powerful as the latest OpenAI, Anthropic, or Gemini models.
Keep this in mind as you try your prompts—your mileage may vary depending on your question!
</p>
<hr class="my-4">
<p class="mb-3">
In the graded labs in <strong>Modules 2, 3, and 5</strong>, you’ll gradually build out the back-end of your research agent,
adding new functionality step-by-step as you progress through the course.
</p>
<p class="mb-0">
To make it easier for you to try out, we've included a web-based front end in this activity. The labs, however, will focus on building only the back-end functionality of the research agent.
<br><br>
The generated report includes clickable links in the references section. To access these, right click and open the link in a new tab or window.
<br><br>
You can find all the code for the web-based agent on GitHub:
<a href="https://github.com/https-deeplearning-ai/agentic-ai-public" target="_blank" class="link-primary fw-semibold">
agentic-ai-public
</a>
</p>
</div>
</div>
<!-- Prompt area con textarea de 4 líneas -->
<div class="mb-3">
<label for="promptInput" class="form-label">Prompt</label>
<div class="d-flex align-items-start gap-2">
<textarea
class="form-control"
id="promptInput"
rows="4"
placeholder="Describe your research topic here"
style="flex: 1"
></textarea>
<button class="btn btn-primary" onclick="submitPrompt()">Submit</button>
</div>
<div class="form-text">Tip: Add 3–4 details to make your question more specific.</div>
</div>
<div id="taskInfo" class="mb-3"></div>
<div id="stepStatusList" class="mb-4"></div>
<div id="stepDetails"></div>
<div id="finalOutput" class="mt-4"></div>
</div>
<script>
let currentTaskId = null;
let currentTopic = null;
let pollInterval = null;
let finalReportMarkdown = "";
const renderedSteps = new Map();
function setTaskInfoGenerating(topic){
document.getElementById('taskInfo').innerHTML = `
<h5 class="d-flex align-items-center">
<span class="me-2">⚙️ Generating steps for the search: <code>${topic}</code></span>
<span id="stepsIcon"><span class='spinner-border spinner-border-sm text-primary'></span></span>
</h5>
`;
}
function setStepsGenerated(){
const stepsIcon = document.getElementById('stepsIcon');
if (stepsIcon) stepsIcon.textContent = '✅';
}
function disableUI(disabled){
const btn = document.querySelector('button.btn.btn-primary');
const input = document.getElementById('promptInput');
if (btn) btn.disabled = disabled;
if (input) input.disabled = disabled;
}
function submitPrompt() {
const prompt = document.getElementById('promptInput').value;
if (!prompt) return;
currentTopic = prompt;
setTaskInfoGenerating(currentTopic);
disableUI(true);
fetch('/generate_report', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ prompt })
})
.then(res => res.json())
.then(data => {
currentTaskId = data.task_id;
document.getElementById('stepStatusList').innerHTML = '';
document.getElementById('stepDetails').innerHTML = '';
document.getElementById('finalOutput').innerHTML = '';
renderedSteps.clear();
if (pollInterval) clearInterval(pollInterval);
pollInterval = setInterval(fetchProgress, 2000);
})
.catch(() => {
const statusIcon = document.getElementById('statusIcon');
if (statusIcon) statusIcon.textContent = '❌';
disableUI(false);
});
}
function fetchProgress() {
if (!currentTaskId) return;
fetch(`/task_progress/${currentTaskId}`)
.then(res => res.json())
.then(data => {
const { steps } = data;
if (Array.isArray(steps) && steps.length > 0) {
setStepsGenerated();
}
steps.forEach((step, index) => {
const stepKey = `${step.title}-${step.status}-${step.substeps.length}`;
if (renderedSteps.get(index) === stepKey) return;
renderedSteps.set(index, stepKey);
const icon = step.status === 'done' ? '✅' :
step.status === 'running' ? '<span class="spinner-border spinner-border-sm text-warning"></span>' :
step.status === 'error' ? '❌' : '🕓';
const rowId = `step-row-${index}`;
let row = document.getElementById(rowId);
if (!row) {
row = document.createElement('div');
row.id = rowId;
row.className = `step-header ${getStatusClass(step.status)}`;
row.innerHTML = `${icon} ${step.title}`;
document.getElementById('stepStatusList').appendChild(row);
} else {
row.className = `step-header ${getStatusClass(step.status)}`;
row.innerHTML = `${icon} ${step.title}`;
}
const cardId = `step-card-${index}`;
let card = document.getElementById(cardId);
const substepsHTML = step.substeps.map((sub, j) => `
<div class="substep">
<div class="substep-header" data-bs-toggle="collapse" data-bs-target="#substep-${index}-${j}" aria-expanded="false" aria-controls="substep-${index}-${j}">
➕ ${sub.title}
</div>
<div id="substep-${index}-${j}" class="collapse">
<div class="bg-light p-2 rounded" style="white-space:pre-wrap;">${marked.parse(sub.content)}</div>
</div>
</div>
`).join('');
if (!card) {
card = document.createElement('div');
card.id = cardId;
card.className = `card mb-3 ${getStepClass(step.status)}`;
card.innerHTML = `
<div class="card-header">${step.title} — ${step.status.toUpperCase()}</div>
<div class="card-body">
<p>${step.description}</p>
${substepsHTML}
</div>
`;
document.getElementById('stepDetails').appendChild(card);
} else {
card.className = `card mb-3 ${getStepClass(step.status)}`;
card.querySelector('.card-header').textContent = `${step.title} — ${step.status.toUpperCase()}`;
card.querySelector('.card-body').innerHTML = `
<p>${step.description}</p>
${substepsHTML}
`;
}
});
fetchTaskStatus();
})
.catch(() => {
// silent
});
}
function fetchTaskStatus() {
fetch(`/task_status/${currentTaskId}`)
.then(res => res.json())
.then(task => {
const statusIcon = document.getElementById('statusIcon');
if (task.status === 'done') {
if (statusIcon) statusIcon.textContent = '✅';
clearInterval(pollInterval);
disableUI(false);
} else if (task.status === 'error') {
if (statusIcon) statusIcon.textContent = '❌';
clearInterval(pollInterval);
disableUI(false);
}
if (typeof task.result === 'string') {
try { task.result = JSON.parse(task.result); } catch { return; }
}
if (task.status === 'done' && task.result?.html_report) {
finalReportMarkdown = task.result.html_report;
document.getElementById('finalOutput').innerHTML = `
<h4>📄 Final Report</h4>
<div class="mb-2">
<button class="btn btn-outline-secondary btn-sm me-2" onclick="downloadMarkdown()">⬇️ Download .md</button>
<button class="btn btn-outline-secondary btn-sm" onclick="downloadHTML()">⬇️ Download .html</button>
</div>
<div id="finalReport">${marked.parse(finalReportMarkdown)}</div>
`;
}
});
}
function getStatusClass(status) {
return status === 'done' ? 'text-success' :
status === 'running' ? 'text-warning' :
status === 'error' ? 'text-danger' : 'text-muted';
}
function getStepClass(status) {
return status === 'done' ? 'step-done' :
status === 'running' ? 'step-running' :
status === 'error' ? 'step-error' : 'step-pending';
}
function downloadMarkdown() {
const blob = new Blob([finalReportMarkdown], { type: 'text/markdown' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = `final_report_${currentTaskId}.md`;
link.click();
}
function downloadHTML() {
const htmlContent = marked.parse(finalReportMarkdown);
const blob = new Blob([htmlContent], { type: 'text/html' });
const link = document.createElement('a');
link.href = URL.createObjectURL(blob);
link.download = `final_report_${currentTaskId}.html`;
link.click();
}
</script>
<script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
gitextract_gb1zi4pq/
├── .gitignore
├── Dockerfile
├── README.md
├── docker/
│ └── entrypoint.sh
├── main.py
├── requirements.txt
├── src/
│ ├── agents.py
│ ├── planning_agent.py
│ └── research_tools.py
└── templates/
└── index.html
SYMBOL INDEX (25 symbols across 4 files) FILE: main.py class Task (line 39) | class Task(Base): class PromptRequest (line 70) | class PromptRequest(BaseModel): function read_index (line 75) | def read_index(request: Request): function health_check (line 80) | def health_check(request: Request): function generate_report (line 85) | def generate_report(req: PromptRequest): function get_task_progress (line 112) | def get_task_progress(task_id: str): function get_task_status (line 117) | def get_task_status(task_id: str): function format_history (line 129) | def format_history(history): function run_agent_workflow (line 135) | def run_agent_workflow(task_id: str, prompt: str, initial_plan_steps: li... FILE: src/agents.py function research_agent (line 14) | def research_agent( function writer_agent (line 155) | def writer_agent( function editor_agent (line 249) | def editor_agent( FILE: src/planning_agent.py function clean_json_block (line 15) | def clean_json_block(raw: str) -> str: function planner_agent (line 27) | def planner_agent(topic: str, model: str = "openai:o4-mini") -> List[str]: function executor_agent_step (line 136) | def executor_agent_step(step_title: str, history: list, prompt: str): FILE: src/research_tools.py function _build_session (line 43) | def _build_session( function ensure_pdf_url (line 75) | def ensure_pdf_url(abs_or_pdf_url: str) -> str: function _safe_filename (line 85) | def _safe_filename(name: str) -> str: function clean_text (line 94) | def clean_text(s: str) -> str: function fetch_pdf_bytes (line 102) | def fetch_pdf_bytes(pdf_url: str, timeout: int = 90) -> bytes: function pdf_bytes_to_text (line 108) | def pdf_bytes_to_text(pdf_bytes: bytes, max_pages: Optional[int] = None)... function maybe_save_pdf (line 135) | def maybe_save_pdf(pdf_bytes: bytes, dest_dir: str, filename: str) -> str: function arxiv_search_tool (line 152) | def arxiv_search_tool( function tavily_search_tool (line 277) | def tavily_search_tool( function wikipedia_search_tool (line 356) | def wikipedia_search_tool(query: str, sentences: int = 5) -> List[Dict]:
Condensed preview — 10 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (65K chars).
[
{
"path": ".gitignore",
"chars": 4688,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[codz]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packag"
},
{
"path": "Dockerfile",
"chars": 580,
"preview": "FROM python:3.11-slim\n\nENV PYTHONDONTWRITEBYTECODE=1 \\\n PYTHONUNBUFFERED=1 \\\n PIP_NO_CACHE_DIR=1\n\n# Postgres (Debi"
},
{
"path": "README.md",
"chars": 5211,
"preview": "# Reflective Research Agent (FastAPI + Postgres, single container)\n\nA FastAPI web app that plans a research workflow, ru"
},
{
"path": "docker/entrypoint.sh",
"chars": 1307,
"preview": "#!/usr/bin/env bash\nset -euo pipefail\n\n# --- Start Debian's default Postgres cluster ---\nPG_MAJOR=\"$(psql -V | awk '{pri"
},
{
"path": "main.py",
"chars": 7100,
"preview": "import os\nimport uuid\nimport json\nimport threading\nfrom datetime import datetime\nfrom typing import Optional, Literal\nfr"
},
{
"path": "requirements.txt",
"chars": 354,
"preview": "fastapi\nuvicorn[standard]\nsqlalchemy\npsycopg2-binary\npython-dotenv\njinja2\nopenai\ntavily-python\nwikipedia\nrequests\naisuit"
},
{
"path": "src/agents.py",
"chars": 12241,
"preview": "from datetime import datetime\nfrom urllib import response\nfrom aisuite import Client\nfrom src.research_tools import (\n "
},
{
"path": "src/planning_agent.py",
"chars": 7409,
"preview": "import json\nimport re\nfrom typing import List\nfrom datetime import datetime\nfrom aisuite import Client\nfrom src.agents i"
},
{
"path": "src/research_tools.py",
"chars": 12114,
"preview": "from typing import List, Dict\nimport requests\nimport xml.etree.ElementTree as ET\nfrom typing import List, Dict, Optional"
},
{
"path": "templates/index.html",
"chars": 10763,
"preview": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <title>Reflective Research Agents with Tools Use</tit"
}
]
About this extraction
This page contains the full source code of the https-deeplearning-ai/agentic-ai-public GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 10 files (60.3 KB), approximately 15.1k tokens, and a symbol index with 25 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.