Repository: ronnieops/pi-search-hub Branch: main Commit: d6a15539a1c9 Files: 9 Total size: 85.9 KB Directory structure: gitextract_flxax2c8/ ├── .github/ │ └── workflows/ │ └── publish.yml ├── .gitignore ├── .npmignore ├── README.md ├── backends/ │ ├── parsers.test.ts │ └── parsers.ts ├── extensions/ │ └── search-hub.ts ├── package.json └── search.json.example ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/publish.yml ================================================ name: Publish Package on: workflow_dispatch: inputs: version: description: "Version to publish (must match package.json)" required: true type: string release: types: [published] permissions: id-token: write contents: read jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: node-version: '22' registry-url: 'https://registry.npmjs.org' - run: npm ci - name: Verify version run: | PKG_VERSION=$(node -p "require('./package.json').version") if [ "$PKG_VERSION" != "${{ inputs.version }}" ]; then echo "ERROR: package.json version ($PKG_VERSION) != requested version (${{ inputs.version }})" exit 1 fi echo "Publishing pi-search-hub@$PKG_VERSION" - run: npm publish ================================================ FILE: .gitignore ================================================ node_modules/ dist/ *.log .DS_Store # API keys — NEVER commit these .pi/ search.json search.json.local *.secret.* # Dev/research docs (local only — never push) findings.md researcher*.md scout*.md handoffs/ docs/ benchmark/ # Benchmark reports benchmark/benchmark-current-report.md # Subagent session artifacts context.md research.md # Other extensions (not search-related) extensions/lint-markdown.ts extensions/ralph-loop.ts # Ralph loop state (local development) .ralph/ # Personal/social media drafts github-meta.md reddit-post.md ================================================ FILE: .npmignore ================================================ # Session artifacts handoffs/ scout*.md researcher*.md context.md research.md # Personal/social media drafts github-meta.md reddit-post.md # Git .git/ .gitignore # All markdown except README *.md !README.md # All TypeScript except the search extension and backends extensions/*.ts !extensions/search-hub.ts # All non-search files .pi/ benchmark/ # Ralph loop state .ralph/ # Docs artifacts docs/ # Tests backends/*.test.ts # Misc *.log .DS_Store ================================================ FILE: README.md ================================================ # pi-search-hub Unified web search + content extraction extension for [pi](https://pi.dev) with **12 backend providers** (all working). One `web_search` tool, one `web_read` tool, auto-fallback, RRF-ranked combine mode, and credential resolution via env/shell/literal. ## Installation ```bash pi install npm:pi-search-hub ``` > **Note for DuckDuckGo backend:** Requires the `ddgs` Python package. Install with: > > - Linux/macOS: `pip3 install ddgs` > - Windows: `pip install ddgs` ## Usage ### Web Search After installing, just ask naturally: ```text Search for recent AI agent frameworks. ``` ```text What's the latest news on Llama 4? ``` Or use the tools directly — the agent picks the best configured backend automatically: - `web_search` — search the web with auto-fallback or parallel combine mode - `web_read` — fetch any URL as clean markdown ### Combine Mode Set `combine=true` to query **ALL enabled backends in parallel** with Reciprocal Rank Fusion (RRF) ranking: ```text Search for "Rust vs Go performance benchmarks" with combine=true to get results from all backends ``` **Combine mode benefits:** - Broader coverage across multiple search indexes - Results ranked by RRF — position-based scoring across all backends - Each result shows which backend found it - URL deduplication with content-aware merge (prefers richest result) - Useful for comprehensive research or when you want diverse sources **Tradeoff:** Uses more API quota per query (all backends are called), but you get more comprehensive results. ### Read Web Pages Fetch any URL as clean markdown — great for extracting article content, docs, or reference pages. **Note: `web_read` uses [Jina Reader](https://r.jina.ai/) to fetch and convert URLs to markdown.** ```text Read https://docs.example.com/api-reference ``` The `web_read` tool supports: - **objective** — CSS selector to target specific content (e.g. "div.article-body") - **keywords** — relevant terms to highlight on long pages - **mode** — `rush` for speed (return innerText) or `smart` (markdown extraction) - **fresh** — bypass cache when freshness matters ## Supported Backends | # | Backend | Free Tier | API Key? | How to get key | | --- | --------------------- | ----------------------------- | :------: | ----------------------------------------------------------------- | | 1 | **DuckDuckGo** | Unlimited (rate-limited) | **No** | `pip install ddgs` (Linux/macOS: `pip3`) | | 2 | **Jina AI** | Search: key req. web_read: free (no key) | Yes | [jina.ai](https://jina.ai) | | 3 | **Marginalia Search** | Unlimited (rate-limited) | **No**† | [marginalia.nu](https://www.marginalia.nu/marginalia-search/api/) | | 4 | **Tavily** | 1,000 calls/month | Yes | [tavily.com](https://tavily.com) | | 5 | **Serper** (Google) | 2,500 free queries (one-time) | Yes | [serper.dev](https://serper.dev) | | 6 | **Brave** | 2,000 queries/month | Yes | [brave.com/search/api](https://brave.com/search/api) | | 7 | **Firecrawl** | 500 free credits | Yes | [firecrawl.dev](https://www.firecrawl.dev) | | 8 | **Exa** | 1,000 free queries/month | Yes | [exa.ai](https://dashboard.exa.ai/api-keys) | | 9 | **LangSearch** | Genuinely free, no CC | Yes | [langsearch.com](https://langsearch.com) | | 10 | **WebSearchAPI.ai** | 2,000 free credits | Yes | [websearchapi.ai](https://www.websearchapi.ai) | | 11 | **Perplexity Sonar** | Paid (usage-based) | Yes | [perplexity.ai](https://docs.perplexity.ai) | | 12 | **SearXNG** | Self-hosted, unlimited | **No** | [docs.searxng.org](https://docs.searxng.org) | > † Marginalia Search uses `public` as a shared API key — no registration required, but subject to a shared rate limit. > > **Jina AI:** Search (`s.jina.ai`) requires a free API key from [jina.ai](https://jina.ai). Content extraction via `web_read` uses Jina Reader (`r.jina.ai`) which is **free and needs no API key**. > > **Perplexity Sonar** supports multiple model variants. Set `model` in your Perplexity backend config to choose: `sonar` (default, fast), `sonar-pro` (higher quality), `sonar-deep-research` (multi-step reasoning), or `sonar-reasoning` (DeepSeek R1-based). > > **SearXNG** is a self-hosted metasearch engine. Run your own instance (or use a public one), no API key required. Configure the instance URL in `.pi/search.json`. > > **Firecrawl** uses `api.firecrawl.dev/v2/search` with a `data.web[]` response shape. The v1 endpoint is deprecated. > > **Exa** (March 2026) includes content for the first 10 results per request at no extra cost. Content extraction is enabled by default. ## Configuration Configure backends globally (all projects) or per-project: **Global:** `~/.pi/agent/extensions/search.json` **Project:** `.pi/search.json` (project takes precedence) ```json { "defaultBackend": "auto", "backends": { "duckduckgo": { "enabled": true }, "jina": { "enabled": true, "apiKey": "JINA_API_KEY" }, "marginalia": { "enabled": true }, "serper": { "enabled": true, "apiKey": "SERPER_API_KEY" }, "tavily": { "enabled": true, "apiKey": "TAVILY_API_KEY" }, "brave": { "enabled": true, "apiKey": "BRAVE_API_KEY" }, "exa": { "enabled": true, "apiKey": "EXA_API_KEY" }, "firecrawl": { "enabled": true, "apiKey": "FIRECRAWL_API_KEY" }, "langsearch": { "enabled": true, "apiKey": "LANGSEARCH_API_KEY" }, "websearchapi": { "enabled": true, "apiKey": "WEBSEARCHAPI_API_KEY" }, "perplexity": { "enabled": true, "apiKey": "PERPLEXITY_API_KEY", "model": "sonar" }, "searxng": { "enabled": true, "instanceUrl": "http://localhost:8888" } } } ``` ### Credential Resolution The `apiKey` field supports four formats (following pi-web-providers convention): | `apiKey` value | Resolved from | Example | | ------------------------- | --------------------------------------- | ---------------------------------- | | `"SERPER_API_KEY"` | `process.env.SERPER_API_KEY` | ALL_CAPS → env var | | `"!pass show api/serper"` | stdout of shell command (cached) | `!` prefix → exec | | `"sk-abc123..."` | Used as-is | Literal key (backwards compatible) | | _(unset)_ | `SEARCH__API_KEY` env fallback | Auto-enables backend | **Env var references:** Any ALL_CAPS string is treated as an environment variable name (not a literal). If the referenced env var is unset, a warning is printed (your literal key is not silently discarded). **Shell commands:** Commands prefixed with `!` are executed via `execSync` with a 5s timeout. Results are cached and invalidated when config is reloaded (editing the config file clears the cache). **Convenience env vars:** Backends are auto-enabled when these env vars are set (even with no config entry): ```bash export SEARCH_SERPER_API_KEY="sk-..." export SEARCH_TAVILY_API_KEY="sk-..." export SEARCH_EXA_API_KEY="sk-..." # ... ``` ```json { "backends": { "serper": { "enabled": true, "apiKey": "SERPER_API_KEY" } } } ``` **To rotate a shell-command key:** Update the secret in your password manager, then trigger a config reload (edit the config file, or wait 10s for automatic refresh). Or use the interactive setup: ``` /search-setup ``` ## Commands | Command | Description | | ---------------- | ----------------------------------------------------------------- | | `/search-setup` | Interactive prompt to configure API keys and instance URLs | | `/search-status` | Show which backends are active, which have keys, and their status | > **Tip:** After running `/search-setup` or editing your config, run `/reload` to activate changes without restarting pi. ## How auto mode works ### Fallback Mode (default, `combine=false`) 1. Tries each enabled backend in order from your config 2. If a backend fails (rate limit, auth error, etc.), moves to the next one 3. Jina AI search requires a free API key from [jina.ai](https://jina.ai) (get one at jina.ai/reader). DuckDuckGo requires no API key. Both serve as safety nets 4. Returns results from the first backend that succeeds 5. If all backends fail, reports the collected errors ### Combine Mode (`combine=true`) 1. Queries **ALL** enabled backends in parallel 2. Each backend receives `numResults / numBackends` as a target 3. Results are merged using **Reciprocal Rank Fusion** (RRF) — position-based scoring that works across incompatible ranking systems 4. Each result shows its source backend (e.g., `*Source: Tavily*`) 5. URL dedup prefers the result with the richest content (content > snippet) 6. Backend statistics are displayed (which succeeded, result counts, errors) ### RRF Scoring RRF assigns each result a score of `Σ(1 / (60 + rank_i))` across all backends that returned it. Results are ranked by score, then by number of backends that found them. This means a result ranked #1 by one backend and #5 by another beats a result ranked #4 by two backends. ## Security - API keys are stored in local config files only (`~/.pi/agent/extensions/search.json` or `.pi/search.json`), never sent to any third party besides the chosen backend - **Env vars and shell commands** are supported for credential resolution — the config file is trusted (you own it), but never commit plain API keys to version control - DuckDuckGo queries use spawned Python subprocess (abortable via signal) - All HTTP backends have a 30-second timeout; shell commands for credentials have a 5-second timeout - Error messages are sanitized — API response bodies are truncated and key-like patterns are redacted - The `.pi/` directory is in `.gitignore` — **never commit API keys to version control** ## Testing ```bash # Run unit tests for backend parsers npx vitest run backends/parsers.test.ts # Quick test Jina AI (with your free API key) curl -s -H "Authorization: Bearer $JINA_API_KEY" "https://s.jina.ai/?q=test&format=json" | jq . # Quick test via curl with your configured key curl -X POST "https://api.exa.ai/search" \ -H "Content-Type: application/json" \ -H "x-api-key: $KEY" \ -d '{"query": "test", "numResults": 3, "contents": {"text": true}}' # Quick test Perplexity Sonar (use "sonar-pro" or "sonar-deep-research" for model) curl -X POST "https://api.perplexity.ai/chat/completions" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{"model": "sonar", "messages": [{"role": "user", "content": "test"}], "search_context_size": "low"}' # Quick test Firecrawl (v2 endpoint — code still uses v1) curl -X POST "https://api.firecrawl.dev/v2/search" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $KEY" \ -d '{"query": "test", "limit": 3}' # Quick test SearXNG (replace URL with your instance) curl "http://localhost:8888/search?q=test&format=json&count=3" ``` ## Adding a new backend Backends are registered via the `BACKEND_DEFS` registry in `extensions/search-hub.ts`. Define a `search` function and add one entry to the registry: ```typescript const BACKEND_DEFS: Record = { // ... existing entries mybackend: { needsKey: true, needsKeyFromConfig: false, needsInstanceUrl: false, label: "My Backend", setupLabel: "My Backend (free tier description)", search: async (query, numResults, { key, signal }) => { const result = await searchMyBackend(query, numResults, key!, signal); return { results: result.results }; }, }, }; ``` The registry handles dispatching, key resolution, formatting labels, and setup menu — no other edits needed. ## License MIT ---

Proudly created with pi

================================================ FILE: backends/parsers.test.ts ================================================ import { describe, it, expect } from "vitest"; import { parseMarginalia, parseWebSearchAPI, parseSerper, parseTavily, parseExa, parseBrave, parseLangSearch, parseFirecrawl, parsePerplexity, parseSearXNG, parseJina, } from "./parsers.js"; // --------------------------------------------------------------------------- // Marginalia // --------------------------------------------------------------------------- describe("parseMarginalia", () => { it("parses standard response", () => { const data = { results: [ { title: "Test 1", url: "https://example.com/1", description: "Desc 1" }, { title: "Test 2", url: "https://example.com/2", description: "Desc 2" }, ], }; const results = parseMarginalia(data, 10); expect(results).toHaveLength(2); expect(results[0]).toEqual({ title: "Test 1", url: "https://example.com/1", snippet: "Desc 1" }); }); it("handles missing fields gracefully", () => { const data = { results: [{}] }; const results = parseMarginalia(data, 10); expect(results).toHaveLength(1); expect(results[0]).toEqual({ title: "", url: "", snippet: "" }); }); it("truncates long descriptions to 500 chars", () => { const data = { results: [{ description: "x".repeat(600) }] }; const results = parseMarginalia(data, 10); expect(results[0].snippet.length).toBe(500); }); it("respects numResults limit", () => { const data = { results: Array.from({ length: 10 }, (_, i) => ({ title: `T${i}`, url: `https://e.com/${i}` })) }; const results = parseMarginalia(data, 3); expect(results).toHaveLength(3); }); it("handles empty results", () => { const results = parseMarginalia({}, 10); expect(results).toHaveLength(0); }); }); // --------------------------------------------------------------------------- // WebSearchAPI // --------------------------------------------------------------------------- describe("parseWebSearchAPI", () => { it("parses organic results", () => { const data = { organic: [ { title: "Web 1", url: "https://web.com/1", description: "Web desc" }, ], }; const results = parseWebSearchAPI(data, 10); expect(results).toHaveLength(1); expect(results[0]).toEqual({ title: "Web 1", url: "https://web.com/1", snippet: "Web desc" }); }); it("handles missing organic field", () => { const results = parseWebSearchAPI({}, 10); expect(results).toHaveLength(0); }); it("handles organic as non-array", () => { const results = parseWebSearchAPI({ organic: "not an array" }, 10); expect(results).toHaveLength(0); }); }); // --------------------------------------------------------------------------- // Serper // --------------------------------------------------------------------------- describe("parseSerper", () => { it("maps link to url", () => { const data = { organic: [{ title: "S", link: "https://s.com", snippet: "snip" }] }; const results = parseSerper(data, 10); expect(results[0]).toEqual({ title: "S", url: "https://s.com", snippet: "snip" }); }); }); // --------------------------------------------------------------------------- // Tavily // --------------------------------------------------------------------------- describe("parseTavily", () => { it("maps content to snippet and preserves content field", () => { const data = { results: [{ title: "T", url: "https://t.com", content: "full content" }] }; const results = parseTavily(data, 10); expect(results[0].snippet).toBe("full content"); expect(results[0].content).toBe("full content"); }); }); // --------------------------------------------------------------------------- // Exa // --------------------------------------------------------------------------- describe("parseExa", () => { it("prefers text over highlight for snippet", () => { const data = { results: [{ title: "E", url: "https://e.com", text: "text val", highlight: "high val" }] }; const results = parseExa(data, 10); expect(results[0].snippet).toBe("text val"); }); it("falls back to highlight when no text", () => { const data = { results: [{ title: "E", url: "https://e.com", highlight: "high val" }] }; const results = parseExa(data, 10); expect(results[0].snippet).toBe("high val"); }); }); // --------------------------------------------------------------------------- // Brave // --------------------------------------------------------------------------- describe("parseBrave", () => { it("navigates web.results path", () => { const data = { web: { results: [{ title: "B", url: "https://b.com", description: "desc" }] } }; const results = parseBrave(data, 10); expect(results[0]).toEqual({ title: "B", url: "https://b.com", snippet: "desc" }); }); it("returns empty when web is missing", () => { expect(parseBrave({}, 10)).toHaveLength(0); }); }); // --------------------------------------------------------------------------- // LangSearch // --------------------------------------------------------------------------- describe("parseLangSearch", () => { it("navigates data.webPages.value path", () => { const data = { data: { webPages: { value: [{ name: "LS", url: "https://ls.com", snippet: "sn" }] } } }; const results = parseLangSearch(data, 10); expect(results[0].title).toBe("LS"); expect(results[0].snippet).toBe("sn"); }); it("prefers name over title", () => { const data = { data: { webPages: { value: [{ name: "Name", title: "Title", url: "https://ls.com" }] } } }; const results = parseLangSearch(data, 10); expect(results[0].title).toBe("Name"); }); }); // --------------------------------------------------------------------------- // Firecrawl v2 // --------------------------------------------------------------------------- describe("parseFirecrawl", () => { it("parses v2 object response with web array", () => { const data = { data: { web: [{ title: "FC", url: "https://fc.com", description: "d" }] } }; const results = parseFirecrawl(data, 10); expect(results[0]).toEqual({ title: "FC", url: "https://fc.com", snippet: "d" }); }); it("parses v2 flat array response", () => { const data = { data: [{ title: "FC", url: "https://fc.com" }] }; const results = parseFirecrawl(data, 10); expect(results).toHaveLength(1); }); it("falls back to v1 results field", () => { const data = { results: [{ title: "FC1", url: "https://fc.com/1" }] }; const results = parseFirecrawl(data, 10); expect(results).toHaveLength(1); }); it("falls back to images when web is empty", () => { const data = { data: { web: [], images: [{ title: "Img", url: "https://img.com" }] } }; const results = parseFirecrawl(data, 10); expect(results[0].title).toBe("Img"); }); }); // --------------------------------------------------------------------------- // Perplexity // --------------------------------------------------------------------------- describe("parsePerplexity", () => { it("builds answer result from content + citations", () => { const data = { citations: ["https://src1.com", "https://src2.com"], choices: [{ message: { content: "The answer is 42" } }], }; const results = parsePerplexity(data, "what is the answer", 10); expect(results[0].title).toBe("Answer: what is the answer"); expect(results[0].snippet).toBe("The answer is 42"); expect(results).toHaveLength(3); // answer + 2 citations }); it("extracts hostname as title from citation URLs", () => { const data = { citations: ["https://www.example.com/path/to/page"] }; const results = parsePerplexity(data, "test", 10); expect(results[0].title).toBe("example.com/path/to/page"); }); it("handles empty citations", () => { const data = { citations: [] }; const results = parsePerplexity(data, "test", 10); expect(results).toHaveLength(0); }); }); // --------------------------------------------------------------------------- // SearXNG // --------------------------------------------------------------------------- describe("parseSearXNG", () => { it("prefers content over snippet", () => { const data = { results: [{ title: "SX", url: "https://sx.com", content: "content", snippet: "snip" }] }; const results = parseSearXNG(data, 10); expect(results[0].snippet).toBe("content"); }); }); // --------------------------------------------------------------------------- // Jina // --------------------------------------------------------------------------- describe("parseJina", () => { it("parses data array with content", () => { const data = { data: [{ title: "J", url: "https://j.com", content: "full article" }] }; const results = parseJina(data, 10); expect(results[0].title).toBe("J"); expect(results[0].content).toBe("full article"); expect(results[0].snippet).toBe("full article"); }); it("truncates content to 2000 chars", () => { const data = { data: [{ title: "J", url: "https://j.com", content: "x".repeat(3000) }] }; const results = parseJina(data, 10); expect(results[0].content.length).toBe(2000); }); }); ================================================ FILE: backends/parsers.ts ================================================ /** * Pure response parsers for search backends. * Each takes raw JSON data and returns normalized results. * No HTTP, no side effects — easy to unit test. */ export interface ParsedResult { title: string; url: string; snippet: string; } // --------------------------------------------------------------------------- // Marginalia Search // Response: { results: [{ title, url, description }] } // --------------------------------------------------------------------------- export function parseMarginalia( data: Record, numResults: number, ): ParsedResult[] { const results = (data.results || []) as Array>; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.description as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // WebSearchAPI.ai // Response: { organic: [{ title, url, description }] } // --------------------------------------------------------------------------- export function parseWebSearchAPI( data: Record, numResults: number, ): ParsedResult[] { const rawResults = data.organic; const organic = Array.isArray(rawResults) ? rawResults : []; return organic.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.description as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // Serper.dev (Google) // Response: { organic: [{ title, link, snippet }] } // --------------------------------------------------------------------------- export function parseSerper( data: Record, numResults: number, ): ParsedResult[] { const rawResults = data.organic; const results = Array.isArray(rawResults) ? rawResults : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.link as string) || "", snippet: (r.snippet as string) || "", })); } // --------------------------------------------------------------------------- // Tavily // Response: { results: [{ title, url, content }] } // --------------------------------------------------------------------------- export interface TavilyParsedResult extends ParsedResult { content?: string; } export function parseTavily( data: Record, numResults: number, ): TavilyParsedResult[] { const rawResults = data.results; const results = Array.isArray(rawResults) ? rawResults : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: (r.content as string) || "", content: r.content as string, })); } // --------------------------------------------------------------------------- // Exa // Response: { results: [{ title, url, text, highlight }] } // --------------------------------------------------------------------------- export function parseExa( data: Record, numResults: number, ): ParsedResult[] { const rawResults = data.results; const results = Array.isArray(rawResults) ? rawResults : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.text as string) || (r.highlight as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // Brave Search // Response: { web: { results: [{ title, url, description }] } } // --------------------------------------------------------------------------- export function parseBrave( data: Record, numResults: number, ): ParsedResult[] { const web = data.web; if (!web || typeof web !== "object") { return []; } const rawResults = (web as Record).results; const results = Array.isArray(rawResults) ? rawResults : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.description as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // LangSearch // Response: { data: { webPages: { value: [{ name, url, snippet, description }] } } } // --------------------------------------------------------------------------- export function parseLangSearch( data: Record, numResults: number, ): ParsedResult[] { const pages = (data.data as Record)?.webPages as Record | undefined; const results = (pages?.value || data.results || data.data || []) as Array>; return results.slice(0, numResults).map((r) => ({ title: (r.name as string) || (r.title as string) || "", url: (r.url as string) || (r.link as string) || "", snippet: ((r.snippet as string) || (r.description as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // Firecrawl v2 // Response: { data: { web: [...] } or data: [...] or { results: [...] } (v1 fallback) // --------------------------------------------------------------------------- export function parseFirecrawl( data: Record, numResults: number, ): ParsedResult[] { const rawData = data.data; let results: Array> = []; if (Array.isArray(rawData)) { results = rawData; } else if (typeof rawData === "object" && rawData !== null) { const obj = rawData as Record; results = Array.isArray(obj.web) ? obj.web : []; if (results.length === 0) { if (Array.isArray(obj.images)) results = obj.images as Array>; else if (Array.isArray(obj.news)) results = obj.news as Array>; } } else if (Array.isArray(data.results)) { results = data.results; } return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.description as string) || (r.snippet as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // Perplexity Sonar // Response: { citations: string[], choices: [{ message: { content } }] } // --------------------------------------------------------------------------- export function parsePerplexity( data: Record, query: string, numResults: number, ): ParsedResult[] { const citations = (data.citations as string[]) || []; const message = (data.choices as Array>)?.[0]?.message as Record | undefined; const answerText = (message?.content as string) || ""; const results: ParsedResult[] = []; if (answerText) { results.push({ title: `Answer: ${query}`, url: citations[0] || "", snippet: answerText.slice(0, 500), }); } for (const url of citations) { try { const u = new URL(url); const title = u.hostname.replace(/^www\./, "") + (u.pathname !== "/" ? u.pathname.slice(0, 60) : ""); results.push({ title: title || url, url, snippet: "" }); } catch { results.push({ title: url, url, snippet: "" }); } } return results.slice(0, numResults); } // --------------------------------------------------------------------------- // SearXNG // Response: { results: [{ title, url, content, snippet }] } // --------------------------------------------------------------------------- export function parseSearXNG( data: Record, numResults: number, ): ParsedResult[] { const rawResults = data.results as Array> | undefined; const results = Array.isArray(rawResults) ? rawResults : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", snippet: ((r.content as string) || (r.snippet as string) || "").slice(0, 500), })); } // --------------------------------------------------------------------------- // Jina AI (s.jina.ai) // Response: { data: [{ title, url, content, description }] } // --------------------------------------------------------------------------- export interface JinaParsedResult extends ParsedResult { content: string; } export function parseJina( data: Record, numResults: number, ): JinaParsedResult[] { const rawData = data.data as Array> | undefined; const results = Array.isArray(rawData) ? rawData : []; return results.slice(0, numResults).map((r) => ({ title: (r.title as string) || "", url: (r.url as string) || "", content: ((r.content as string) || (r.description as string) || "").slice(0, 2000), snippet: ((r.content as string) || (r.description as string) || "").slice(0, 500), })); } ================================================ FILE: extensions/search-hub.ts ================================================ /** * Extension — Unified web search (12 backends) + content extraction (web_read) * * Backends (choose any, all disabled by default): * duckduckgo — ✅ Free, no key, via Python ddgs lib. Rate-limited. * jina — ✅ Free tier (API key optional for higher rate limits), full markdown via s.jina.ai * marginalia — ✅ Anti-SEO, "public" key optional. 354ms avg * serper — ✅ Google via serper.dev, 2500 free/mo. 667ms * brave — ✅ Brave Search, 2000 free/mo. 460ms * tavily — ✅ AI search, 1000 free/mo. 356ms BEST QUALITY * exa — ✅ AI-native, 10 QPS free tier. 137ms FASTEST * firecrawl — ✅ Search+crawl, 500 free credits. 644ms * langsearch — ✅ Free tier, no CC. 1816ms * websearchapi — ✅ Google-powered, 2000 free credits. 1323ms * perplexity — ✅ Unlimited free Sonar, citation-based answers * searxng — ✅ Self-hosted, 70+ aggregators. Needs instance URL * * Tools: web_search (auto-fallback + RRF combine mode), web_read (URL content) * Config: ~/.pi/agent/extensions/search.json + .pi/search.json (project wins) * Credentials: env var refs (ALL_CAPS), shell commands (!command), or literal keys * * Example .pi/search.json: * { * "defaultBackend": "auto", * "backends": { * "duckduckgo": { "enabled": true }, * "marginalia": { "enabled": true }, * "serper": { "enabled": true, "apiKey": "..." }, * "tavily": { "enabled": true, "apiKey": "..." }, * "exa": { "enabled": true, "apiKey": "..." }, * "firecrawl": { "enabled": true, "apiKey": "..." }, * "langsearch": { "enabled": true, "apiKey": "..." }, * "websearchapi": { "enabled": true, "apiKey": "..." }, * "perplexity": { "enabled": true, "apiKey": "..." }, * "searxng": { "enabled": true, "instanceUrl": "http://localhost:8888" } * } * } */ import { execSync, spawn } from "node:child_process"; import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs"; import { join } from "node:path"; import type { ExtensionAPI } from "@earendil-works/pi-coding-agent"; import { StringEnum } from "@earendil-works/pi-ai"; import { Type } from "typebox"; import { parseMarginalia, parseWebSearchAPI, parseSerper, parseTavily, parseExa, parseBrave, parseLangSearch, parseFirecrawl, parsePerplexity, parseSearXNG, parseJina, } from "../backends/parsers.js"; // --------------------------------------------------------------------------- // Types & Config // --------------------------------------------------------------------------- interface BackendConfig { enabled?: boolean; apiKey?: string; /** SearXNG-specific: base URL of the self-hosted instance (e.g. http://localhost:8888) */ instanceUrl?: string; /** Perplexity-specific: model variant (sonar, sonar-pro, sonar-deep-research, sonar-reasoning). Default: sonar */ model?: string; } interface SearchConfig { defaultBackend?: string; backends?: { duckduckgo?: BackendConfig; marginalia?: BackendConfig; serper?: BackendConfig; tavily?: BackendConfig; exa?: BackendConfig; brave?: BackendConfig; langsearch?: BackendConfig; firecrawl?: BackendConfig; websearchapi?: BackendConfig; perplexity?: BackendConfig; searxng?: BackendConfig; }; } function getAgentDir(): string { return join(process.env.HOME || process.env.USERPROFILE || "~", ".pi", "agent"); } const commandValueCache = new Map(); const COMMAND_TIMEOUT_MS = 5_000; /** * Resolve a credential reference à la pi-web-providers: * • "!command" → execute shell command, return trimmed stdout (cached) * • "ALL_CAPS" → read process.env[ALL_CAPS] * • otherwise → return as literal string (actual key) */ function resolveConfigValue(reference: string | undefined): string | undefined { if (!reference) return undefined; // !command — execute shell command, cache result if (reference.startsWith("!")) { const cached = commandValueCache.get(reference); if (cached) { if (cached.errorMessage) throw new Error(cached.errorMessage); return cached.value; } try { const output = execSync(reference.slice(1), { encoding: "utf-8", stdio: ["ignore", "pipe", "pipe"], timeout: COMMAND_TIMEOUT_MS, }) .trim(); const value = output.length > 0 ? output : undefined; commandValueCache.set(reference, { value }); return value; } catch (error) { const errorMessage = (error as Error).message; commandValueCache.set(reference, { errorMessage }); throw error; } } // ALL_CAPS → env var lookup const envValue = process.env[reference]; if (envValue !== undefined) return envValue; if (/^[A-Z][A-Z0-9_]*$/.test(reference)) { // Warn: value looks like an env var reference but the env var is unset. // If this was intended as a literal key, rename it or set the env var. console.warn(`[pi-search] Credential reference "${reference}" matches ALL_CAPS env-var pattern ` + `but process.env.${reference} is not set. If this is a literal key, ` + `use a different name to avoid confusion.`); return undefined; } // Otherwise → literal string (actual key in config) return reference; } /** Convenience env vars checked as fallback when config has no apiKey for a backend. */ const FALLBACK_ENV_MAP: Record = { jina: "SEARCH_JINA_API_KEY", serper: "SEARCH_SERPER_API_KEY", tavily: "SEARCH_TAVILY_API_KEY", exa: "SEARCH_EXA_API_KEY", brave: "SEARCH_BRAVE_API_KEY", langsearch: "SEARCH_LANGSEARCH_API_KEY", firecrawl: "SEARCH_FIRECRAWL_API_KEY", websearchapi: "SEARCH_WEBSEARCHAPI_API_KEY", perplexity: "SEARCH_PERPLEXITY_API_KEY", }; /** Invalidate cached shell-command credentials so key rotation takes effect. */ function clearCredentialCache(): void { commandValueCache.clear(); } /** Lazy resolution: config.apiKey → resolveConfigValue() → FALLBACK_ENV_MAP fallback. */ function resolveBackendKey(backend: string): string | undefined { const bc = config.backends?.[backend as keyof typeof config.backends]; if (bc?.apiKey) { const resolved = resolveConfigValue(bc.apiKey); if (resolved) return resolved; } const fallbackEnv = FALLBACK_ENV_MAP[backend]; if (fallbackEnv) { const envValue = process.env[fallbackEnv]; if (envValue && envValue.trim().length > 0) return envValue.trim(); } return undefined; } /** Describe where a backend's key comes from (for search-status display). */ function getKeySource(backend: string): { configured: boolean; source: string } { const bc = config.backends?.[backend as keyof typeof config.backends]; if (!bc?.apiKey) { const fallbackEnv = FALLBACK_ENV_MAP[backend]; if (fallbackEnv && process.env[fallbackEnv]) { return { configured: true, source: `env:${fallbackEnv}` }; } return { configured: false, source: "" }; } const ref = bc.apiKey; if (ref.startsWith("!")) { return { configured: true, source: `shell:${ref.slice(0, 40)}...` }; } if (/^[A-Z][A-Z0-9_]*$/.test(ref)) { const envValue = process.env[ref]; if (envValue) return { configured: true, source: `env:${ref}` }; return { configured: false, source: `env:${ref} (unset)` }; } return { configured: true, source: "literal" }; } function loadConfig(cwd: string): SearchConfig { const globalPath = join(getAgentDir(), "extensions", "search.json"); const projectPath = join(cwd, ".pi", "search.json"); let config: SearchConfig = { defaultBackend: "duckduckgo", backends: {} }; if (existsSync(globalPath)) { try { config = { ...config, ...JSON.parse(readFileSync(globalPath, "utf-8")) }; } catch { // ignore } } // Save global backends before project config overwrites them const preProjectBackends = { ...(config.backends ?? {}) }; if (existsSync(projectPath)) { try { const project = JSON.parse(readFileSync(projectPath, "utf-8")); config = { ...config, ...project }; // Guard: if project config set backends to null/undefined, restore global backends if (config.backends == null) { config.backends = preProjectBackends; } if (project.backends && typeof project.backends === "object") { // Deep merge: merge per-backend so global backends not re-listed in project config are preserved const merged = { ...preProjectBackends, ...config.backends }; for (const [key, val] of Object.entries(project.backends)) { if (val && merged[key]) { merged[key] = { ...merged[key], ...val }; } else { merged[key] = val; } } config.backends = merged; } } catch { // ignore } } // Auto-enable backends that have a convenience env var but no explicit config yet. // Only enables if the backend is not explicitly disabled (enabled !== false). for (const [backend, envVar] of Object.entries(FALLBACK_ENV_MAP)) { const envValue = process.env[envVar]; if (envValue && envValue.trim().length > 0) { const configBackends = config.backends ?? {}; const existing = configBackends[backend as keyof typeof configBackends]; if (!existing || existing.enabled === undefined) { if (!config.backends) config.backends = {}; (config.backends as Record)[backend] = { ...existing, enabled: true, }; } } } return config; } const MISSING_KEY_HELP = "Set the API key via env var (SEARCH__API_KEY), " + "config reference (\"apiKey\": \"SOME_ENV_VAR\"), " + "shell command (\"apiKey\": \"!pass show api/backend\"), " + "or a literal key in ~/.pi/agent/extensions/search.json. " + "DuckDuckGo & Marginalia need no key."; const HTTP_TIMEOUT_MS = 30_000; /** Simple per-backend cooldown to avoid hammering rate-limited APIs. */ const COOLDOWN_MS = 2_000; const backendCooldowns = new Map(); function waitForCooldown(backend: string): Promise { const until = backendCooldowns.get(backend); if (!until) return Promise.resolve(); const delay = until - Date.now(); if (delay <= 0) return Promise.resolve(); return new Promise(r => setTimeout(r, delay)); } function markCooldown(backend: string) { backendCooldowns.set(backend, Date.now() + COOLDOWN_MS); } /** Combine an optional caller signal with a default timeout. */ function timeoutSignal(signal?: AbortSignal): AbortSignal | undefined { if (!signal) return AbortSignal.timeout(HTTP_TIMEOUT_MS); return AbortSignal.any([signal, AbortSignal.timeout(HTTP_TIMEOUT_MS)]); } /** Sanitize API error text — truncate and strip potential secrets. */ function sanitizeError(status: number, text: string): string { const safe = text // Redact "Bearer " and "Token " patterns .replace(/(bearer|token)\s+[\w.\/-]{8,}/gi, "$1 [redacted]") // Redact key=value or "key": "value" pairs for known secret keys .replace(/(api[-_]?key|bearer|token|authorization|secret|password)["']?\s*[:=]\s*["']?[\w.\/-]{8,}/gi, "[redacted]") // Redact JSON key-value pairs where the value looks like a key .replace(/"(?:api[-_]?key|apiKey|token|secret|password|bearer)"\s*:\s*"[^"']{8,}"/gi, '"[redacted]"') // Redact x-api-key / Authorization header values in raw text .replace(/(x-api-key|authorization)\s*:\s*[\w.\/-]{8,}/gi, "$1: [redacted]") .slice(0, 300); return `API error (${status}): ${safe}`; } // --------------------------------------------------------------------------- // Backend: DuckDuckGo (free, no key needed) // --------------------------------------------------------------------------- interface DuckDuckGoResult { title: string; url: string; snippet: string; } async function searchDuckDuckGo( query: string, numResults: number, signal?: AbortSignal, ): Promise<{ results: DuckDuckGoResult[] }> { if (signal?.aborted) throw new Error("DuckDuckGo search aborted"); const pyScript = ` import json, sys try: from ddgs import DDGS except ImportError: # ddgs may be installed as a uv tool — find it and add to sys.path import subprocess, pathlib try: ddgs_bin = subprocess.check_output(["which", "ddgs"], text=True, stderr=subprocess.DEVNULL).strip() if ddgs_bin: # Walk up from the binary until we find site-packages — no hardcoded depth assumption ddgs_path = pathlib.Path(ddgs_bin).resolve() found = False for parent in [ddgs_path, *ddgs_path.parents]: for py_ver_dir in sorted((parent / "lib").iterdir(), reverse=True): sp = py_ver_dir / "site-packages" if sp.is_dir(): sys.path.insert(0, str(sp)) found = True break if found: break if not found: sys.exit(1) except Exception: sys.exit(1) from ddgs import DDGS results = [] with DDGS() as ddgs: for i, r in enumerate(ddgs.text(${JSON.stringify(query)}, max_results=${numResults})): results.append({"title": r.get("title",""), "url": r.get("href",""), "snippet": r.get("body","")}) print(json.dumps({"results": results})) `; return new Promise((resolve, reject) => { const pythonCmd = process.platform === "win32" ? "python" : "python3"; const proc = spawn(pythonCmd, ["-c", pyScript], { stdio: ["pipe", "pipe", "pipe"], }); let stdout = ""; let stderr = ""; proc.stdout.on("data", (data: Buffer) => { stdout += data.toString(); }); proc.stderr.on("data", (data: Buffer) => { stderr += data.toString(); }); // Timeout timer const timeout = setTimeout(() => { proc.kill(); reject(new Error("DuckDuckGo search timed out")); }, HTTP_TIMEOUT_MS); // Abort signal handler const onAbort = () => { clearTimeout(timeout); proc.kill(); reject(new Error("DuckDuckGo search aborted")); }; if (signal) { if (signal.aborted) { clearTimeout(timeout); reject(new Error("DuckDuckGo search aborted")); return; } signal.addEventListener("abort", onAbort, { once: true }); } proc.on("close", (code) => { clearTimeout(timeout); if (signal) signal.removeEventListener("abort", onAbort); if (code === 0) { try { resolve(JSON.parse(stdout.trim())); } catch { reject(new Error(`DuckDuckGo search: invalid JSON output: ${stdout.slice(0, 200)}`)); } } else { const msg = stderr.trim().slice(0, 300); reject(new Error(`DuckDuckGo search failed (exit ${code}): ${msg || "unknown error"}`)); } }); proc.on("error", (err) => { clearTimeout(timeout); if (signal) signal.removeEventListener("abort", onAbort); reject(new Error(`DuckDuckGo search failed: ${err.message}`)); }); }); } // --------------------------------------------------------------------------- // Backend: Marginalia Search (anti-SEO independent search, uses "public" key) // --------------------------------------------------------------------------- async function searchMarginalia( query: string, numResults: number, apiKey: string | undefined, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet: string }> }> { const key = apiKey || "public"; const response = await fetch( `https://api.marginalia.nu/${encodeURIComponent(key)}/search/${encodeURIComponent(query)}?index=0&count=${Math.min(numResults, 50)}`, { signal: timeoutSignal(signal), headers: { "Accept": "application/json" }, }, ); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Marginalia ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseMarginalia(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Serper.dev (Google search, needs API key) // --------------------------------------------------------------------------- async function searchSerper( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet: string }> }> { const body = { q: query, num: Math.min(numResults, 100) }; const response = await fetch("https://google.serper.dev/search", { method: "POST", headers: { "X-API-KEY": apiKey, "Content-Type": "application/json", }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Serper ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseSerper(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Tavily (AI-agent search, needs API key) // --------------------------------------------------------------------------- async function searchTavily( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet: string; content?: string }> }> { const body = { query, max_results: Math.min(numResults, 20), include_answer: false, }; const response = await fetch("https://api.tavily.com/search", { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Tavily ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseTavily(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Exa (optional, needs API key) // --------------------------------------------------------------------------- async function searchExa( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const body = { query, numResults: Math.min(numResults, 25), contents: { text: true, highlights: true }, }; const response = await fetch("https://api.exa.ai/search", { method: "POST", headers: { "Content-Type": "application/json", "x-api-key": apiKey, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); let detail = text; try { const json = JSON.parse(text); detail = json.error || json.message || text; } catch { // use raw } throw new Error(`Exa ${sanitizeError(response.status, detail)}`); } const data = (await response.json()) as Record; return { results: parseExa(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Brave Search (metered billing ~$5/mo credit, needs API key) // --------------------------------------------------------------------------- async function searchBrave( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const params = new URLSearchParams({ q: query, count: String(Math.min(numResults, 20)) }); const response = await fetch(`https://api.search.brave.com/res/v1/web/search?${params}`, { method: "GET", headers: { "Accept": "application/json", "Accept-Encoding": "gzip", "X-Subscription-Token": apiKey, }, signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Brave ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseBrave(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: LangSearch (genuinely free tier, no credit card, needs API key) // Endpoint: POST /v1/web-search, auth: Authorization: Bearer // --------------------------------------------------------------------------- async function searchLangSearch( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const body = { query, max_results: Math.min(numResults, 20) }; const response = await fetch("https://api.langsearch.com/v1/web-search", { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}`, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`LangSearch ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseLangSearch(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Firecrawl (500 free credits, search+crawl+extract, needs API key) // --------------------------------------------------------------------------- async function searchFirecrawl( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const body = { query, limit: Math.min(numResults, 20) }; const response = await fetch("https://api.firecrawl.dev/v2/search", { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}`, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Firecrawl ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseFirecrawl(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: WebSearchAPI.ai (2000 free credits, needs API key) // Endpoint: POST /ai-search, auth: Authorization: Bearer // Params: maxResults, includeContent, country, language // --------------------------------------------------------------------------- async function searchWebSearchAPI( query: string, numResults: number, apiKey: string, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const body = { query, maxResults: Math.min(numResults, 20), includeContent: false, country: "us", language: "en", }; const response = await fetch("https://api.websearchapi.ai/ai-search", { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}`, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`WebSearchAPI ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseWebSearchAPI(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Perplexity Sonar (free tier, unlimited queries, needs API key) // Endpoint: POST /chat/completions, auth: Authorization: Bearer // Uses sonar model (configurable), extracts citations from response as search results // --------------------------------------------------------------------------- async function searchPerplexity( query: string, numResults: number, apiKey: string, signal?: AbortSignal, model?: string, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { const body = { model: model || "sonar", messages: [ { role: "user", content: query, }, ], search_context_size: "high", }; const response = await fetch("https://api.perplexity.ai/chat/completions", { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}`, }, body: JSON.stringify(body), signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Perplexity ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parsePerplexity(data, query, numResults) }; } // --------------------------------------------------------------------------- // Backend: SearXNG (self-hosted metasearch, aggregates 70+ providers) // Endpoint: GET /search?q=&format=json, optional auth via API key header // Needs instance URL configured in search.json // --------------------------------------------------------------------------- async function searchSearXNG( query: string, numResults: number, apiKey: string | undefined, instanceUrl: string | undefined, signal?: AbortSignal, ): Promise<{ results: Array<{ title: string; url: string; snippet?: string }> }> { if (!instanceUrl) { throw new Error("SearXNG instance URL not configured. Set searxng.instanceUrl in search.json (e.g. http://localhost:8888)"); } const baseUrl = instanceUrl.replace(/\/+$/, ""); const params = new URLSearchParams({ q: query, format: "json", count: String(Math.min(numResults, 50)), }); const headers: Record = { "Accept": "application/json", }; if (apiKey) { headers["Authorization"] = `Bearer ${apiKey}`; } const response = await fetch(`${baseUrl}/search?${params}`, { method: "GET", headers, signal: timeoutSignal(signal), }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`SearXNG ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseSearXNG(data, numResults), }; } // --------------------------------------------------------------------------- // Backend: Jina AI (s.jina.ai) — search results via s.jina.ai; requires API key from jina.ai (free). web_read uses r.jina.ai (Reader, no key needed). // Endpoint: GET https://s.jina.ai/?q=, returns 5 results as markdown or JSON // --------------------------------------------------------------------------- interface JinaResult { title: string; url: string; content: string; } async function searchJina( query: string, numResults: number, apiKey?: string, signal?: AbortSignal, ): Promise<{ results: JinaResult[] }> { const url = `https://s.jina.ai/?q=${encodeURIComponent(query)}&format=json`; const headers: Record = { "Accept": "application/json", }; if (apiKey) { headers["Authorization"] = `Bearer ${apiKey}`; } const response = await fetch(url, { signal: timeoutSignal(signal), headers, }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Jina AI ${sanitizeError(response.status, text)}`); } const data = (await response.json()) as Record; return { results: parseJina(data, numResults), }; } // --------------------------------------------------------------------------- // Backend Registry // --------------------------------------------------------------------------- interface BackendRunner { needsKey: boolean; needsKeyFromConfig: boolean; optionalKey: boolean; needsInstanceUrl: boolean; label: string; setupLabel: string | null; search: (query: string, numResults: number, deps: { key?: string; instanceUrl?: string; signal?: AbortSignal }) => Promise<{ results: Array<{ title: string; url: string; snippet?: string; content?: string }> }>; } const BACKEND_DEFS: Record = { duckduckgo: { needsKey: false, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "DuckDuckGo", setupLabel: null, search: async (query, numResults, { signal }) => { const ddg = await searchDuckDuckGo(query, numResults, signal); return { results: ddg.results }; }, }, jina: { needsKey: false, needsKeyFromConfig: false, optionalKey: true, needsInstanceUrl: false, label: "Jina AI", setupLabel: "Jina AI (free tier, API key optional for higher rate limits)", search: async (query, numResults, { key, signal }) => { return await searchJina(query, numResults, key, signal); }, }, marginalia: { needsKey: false, needsKeyFromConfig: true, optionalKey: false, needsInstanceUrl: false, label: "Marginalia", setupLabel: null, search: async (query, numResults, { key, signal }) => { const marg = await searchMarginalia(query, numResults, key, signal); return { results: marg.results }; }, }, serper: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Serper", setupLabel: "Serper (Google — 2500 free queries, one-time)", search: async (query, numResults, { key, signal }) => { const serp = await searchSerper(query, numResults, key!, signal); return { results: serp.results }; }, }, tavily: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Tavily", setupLabel: "Tavily (AI agent search — 1000 free calls/month)", search: async (query, numResults, { key, signal }) => { const tav = await searchTavily(query, numResults, key!, signal); return { results: tav.results }; }, }, exa: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Exa", setupLabel: "Exa (AI search — 1000 free queries/month)", search: async (query, numResults, { key, signal }) => { const exa = await searchExa(query, numResults, key!, signal); return { results: exa.results }; }, }, brave: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Brave", setupLabel: "Brave Search (metered billing ~$5/mo credit)", search: async (query, numResults, { key, signal }) => { const br = await searchBrave(query, numResults, key!, signal); return { results: br.results }; }, }, langsearch: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "LangSearch", setupLabel: "LangSearch (genuinely free, no CC)", search: async (query, numResults, { key, signal }) => { const ls = await searchLangSearch(query, numResults, key!, signal); return { results: ls.results }; }, }, firecrawl: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Firecrawl", setupLabel: "Firecrawl (500 free credits)", search: async (query, numResults, { key, signal }) => { const fc = await searchFirecrawl(query, numResults, key!, signal); return { results: fc.results }; }, }, websearchapi: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "WebSearchAPI", setupLabel: "WebSearchAPI.ai (2000 free credits)", search: async (query, numResults, { key, signal }) => { const ws = await searchWebSearchAPI(query, numResults, key!, signal); return { results: ws.results }; }, }, perplexity: { needsKey: true, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: false, label: "Perplexity Sonar", setupLabel: "Perplexity Sonar (paid, usage-based)", search: async (query, numResults, { key, signal }) => { const bc = (config.backends as Record | undefined)?.perplexity; const model = (bc as Record)?.model as string | undefined; const pp = await searchPerplexity(query, numResults, key!, signal, model); return { results: pp.results }; }, }, searxng: { needsKey: false, needsKeyFromConfig: false, optionalKey: false, needsInstanceUrl: true, label: "SearXNG", setupLabel: "SearXNG (self-hosted, needs instance URL)", search: async (query, numResults, { key, instanceUrl, signal }) => { const sx = await searchSearXNG(query, numResults, key, instanceUrl, signal); return { results: sx.results }; }, }, }; // --------------------------------------------------------------------------- // Reciprocal Rank Fusion // --------------------------------------------------------------------------- /** * RRF (Reciprocal Rank Fusion) — rank-based merge across backends. * Constant k=60 is standard from the original RRF paper. */ const RRF_K = 60; function reciprocalRankFusion( backendResults: Array<{ backend: string; results: SearchResultWithBackend[] }>, numResults: number, ): SearchResultWithBackend[] { // Score each unique result by its rank positions across backends const urlScores = new Map }>(); for (const { backend, results } of backendResults) { for (let i = 0; i < results.length; i++) { const r = results[i]; const normalizedUrl = r.url.replace(/\/$/, "").toLowerCase(); // normalize trailing slash let entry = urlScores.get(normalizedUrl); if (!entry) { entry = { score: 0, result: r, seenBackends: new Set() }; urlScores.set(normalizedUrl, entry); } // RRF: score += 1 / (k + rank) entry.score += 1 / (RRF_K + i); entry.seenBackends.add(backend); // Keep the result with the most complete data (prefer content over snippet) if (r.content && !entry.result.content) { entry.result = r; } } } // Sort by RRF score descending, then by number of backends that found it const sorted = Array.from(urlScores.values()) .sort((a, b) => { const scoreDiff = b.score - a.score; if (scoreDiff !== 0) return scoreDiff; return b.seenBackends.size - a.seenBackends.size; }) .slice(0, numResults) .map(e => e.result); return sorted; } // --------------------------------------------------------------------------- // Result formatting // --------------------------------------------------------------------------- interface SearchResultWithBackend { title: string; url: string; snippet?: string; content?: string; backend?: string; } function formatResults( query: string, backend: string, results: Array<{ title: string; url: string; snippet?: string; content?: string }>, ): string { // Escape newlines and markdown heading chars in query to prevent injection const safeQuery = query.replace(/[\n\r]/g, " ").replace(/^#/gm, "\\#"); const lines: string[] = [ `## Search Results: "${safeQuery}"`, `Backend: ${backend} · Results: ${results.length}`, "", ]; for (let i = 0; i < results.length; i++) { const r = results[i]; lines.push(`### ${i + 1}. ${r.title || "Untitled"}`); lines.push(` URL: ${r.url}`); const displayText = r.snippet || r.content || ""; if (displayText) { const text = displayText.slice(0, 500); lines.push(` ${text}${displayText.length > 500 ? "..." : ""}`); } lines.push(""); } return lines.join("\n"); } function formatCombinedResults( query: string, results: SearchResultWithBackend[], backendStats: Map, ): string { const safeQuery = query.replace(/[\n\r]/g, " ").replace(/^#/gm, "\\#"); const lines: string[] = [ `## Search Results: "${safeQuery}"`, `Mode: combined · Results: ${results.length}`, "", ]; // Add backend stats (derived from registry) const backendLabel = Object.fromEntries( Object.entries(BACKEND_DEFS).map(([k, v]) => [k, v.label]) ) as Record; lines.push("**Backends queried:**"); for (const [backend, stats] of backendStats.entries()) { const label = backendLabel[backend] || backend; if (stats.success) { lines.push(` - ${label}: ${stats.count} results`); } else { lines.push(` - ${label}: failed (${stats.error || "unknown error"})`); } } lines.push(""); // Add results for (let i = 0; i < results.length; i++) { const r = results[i]; lines.push(`### ${i + 1}. ${r.title || "Untitled"}`); if (r.backend) { lines.push(` *Source: ${backendLabel[r.backend] || r.backend}*`); } lines.push(` URL: ${r.url}`); const displayText = r.snippet || r.content || ""; if (displayText) { const text = displayText.slice(0, 500); lines.push(` ${text}${displayText.length > 500 ? "..." : ""}`); } lines.push(""); } return lines.join("\n"); } // --------------------------------------------------------------------------- // Extension // --------------------------------------------------------------------------- /** Module-level config accessible from helper functions like resolveBackendKey(). */ let config: SearchConfig = { defaultBackend: "duckduckgo", backends: {} }; export default function (pi: ExtensionAPI) { let activeBackends: string[] = []; let configCacheTime = 0; const CONFIG_TTL_MS = 10_000; // re-read config at most every 10s function refreshConfig(cwd: string, force = false) { const now = Date.now(); if (!force && now - configCacheTime < CONFIG_TTL_MS) return; config = loadConfig(cwd); configCacheTime = now; activeBackends = Object.entries(config.backends || {}) .filter(([_, bc]) => bc?.enabled) .map(([name]) => name); // Always add duckduckgo if no backends explicitly enabled, since it needs no key if (activeBackends.length === 0) { activeBackends.push("duckduckgo"); } // Honor defaultBackend: put it first in the auto-try order if (config.defaultBackend && activeBackends.includes(config.defaultBackend)) { activeBackends = [ config.defaultBackend, ...activeBackends.filter(b => b !== config.defaultBackend), ]; } else { config.defaultBackend = activeBackends[0]; } // Invalidate credential cache so shell-command keys refresh after config reload clearCredentialCache(); } // ----------------------------------------------------------------------- // Backend dispatcher // ----------------------------------------------------------------------- async function runBackend( backend: string, query: string, numResults: number, signal?: AbortSignal, ): Promise> { await waitForCooldown(backend); try { const def = BACKEND_DEFS[backend]; if (!def) throw new Error(`Unknown backend: ${backend}`); let key: string | undefined; if (def.needsKeyFromConfig) { const bc = (config.backends as Record | undefined)?.[backend]; key = bc?.apiKey; } else if (def.needsKey) { key = resolveBackendKey(backend); if (!key) { const label = def.label; throw new Error(`${label} backend not configured. ${MISSING_KEY_HELP}`); } } else if (def.optionalKey) { // Optionally resolve key — don't throw if missing key = resolveBackendKey(backend); } let instanceUrl: string | undefined; if (def.needsInstanceUrl) { const bc = (config.backends as Record | undefined)?.[backend]; instanceUrl = bc?.instanceUrl; if (!instanceUrl) { throw new Error(`SearXNG instance URL not configured. Set searxng.instanceUrl in search.json`); } } const result = await def.search(query, numResults, { key, instanceUrl, signal }); return result.results; } finally { markCooldown(backend); } } // ----------------------------------------------------------------------- // Tool: web_search // ----------------------------------------------------------------------- pi.registerTool({ name: "web_search", label: "Web Search", description: "Search the web using one of several backend search engines. " + "Supports DuckDuckGo (free, no key), " + "Marginalia Search (free, shared public key), Serper, Tavily, Exa, Brave, " + "LangSearch, Firecrawl, WebSearchAPI, Perplexity Sonar, and SearXNG (most need API keys). " + "The best available backend is used automatically. " + "Use combine=true to query all enabled backends in parallel for broader coverage. " + "Use for fact-finding, research, documentation lookups, and current events.", promptSnippet: "Search the web (supports multiple search backends)", promptGuidelines: [ "Use web_search when you need up-to-date information, facts, or documentation from the web", "Auto mode tries enabled backends in order (DuckDuckGo is the free fallback)", "Set combine=true to query ALL backends in parallel and merge/deduplicate results", "Configure additional backends in .pi/search.json for better quality results", ], parameters: Type.Object({ query: Type.String({ description: "Search query (natural language works best)", }), numResults: Type.Optional( Type.Number({ description: "Number of results (1-20, default 10)", default: 10, }), ), backend: Type.Optional( StringEnum(["duckduckgo", "jina", "marginalia", "serper", "tavily", "exa", "brave", "langsearch", "firecrawl", "websearchapi", "perplexity", "searxng", "auto"] as const, { description: "Backend to use. 'auto' picks the best configured backend (default)", }), ), combine: Type.Optional( Type.Boolean({ description: "When true, queries ALL enabled backends in parallel and merges/deduplicates results. " + "Default is false (fallback mode: uses first successful backend only). " + "Ignored when a specific backend is requested (backend != 'auto').", default: false, }), ), }), async execute(_toolCallId, params, signal, _onUpdate, ctx) { refreshConfig(ctx.cwd); const numResults = Math.max(1, Math.min(params.numResults ?? 10, 20)); const requestedBackend = params.backend || "auto"; const combine = params.combine ?? false; if (requestedBackend !== "auto") { // Specific backend requested — try it directly const results = await runBackend(requestedBackend, params.query, numResults, signal); return { content: [{ type: "text", text: formatResults(params.query, requestedBackend, results) }], details: { backend: requestedBackend, resultCount: results.length }, }; } // Auto mode if (combine) { // Combine mode: query all enabled backends in parallel const resultsPerBackend = await Promise.all( activeBackends.map(async (backend) => { try { const results = await runBackend( backend, params.query, Math.ceil(numResults / activeBackends.length), signal, ); return { backend, results: results.map((r) => ({ ...r, backend })) as SearchResultWithBackend[], success: true, }; } catch (err) { return { backend, results: [] as SearchResultWithBackend[], success: false, error: (err as Error).message, }; } }), ); // Build backend stats map const backendStats = new Map< string, { success: boolean; count: number; error?: string } >(); for (const { backend, results, success, error } of resultsPerBackend) { backendStats.set(backend, { success, count: results.length, error, }); } // Merge and re-rank using Reciprocal Rank Fusion const successfulBackends = resultsPerBackend .filter(r => r.success && r.results.length > 0) .map(r => ({ backend: r.backend, results: r.results })); const combined = successfulBackends.length > 0 ? reciprocalRankFusion(successfulBackends, numResults) : []; return { content: [ { type: "text", text: formatCombinedResults(params.query, combined, backendStats), }, ], details: { backend: "combined", resultCount: combined.length, backendStats: Object.fromEntries(backendStats), }, }; } else { // Fallback mode: try each enabled backend in order const errors: string[] = []; for (const backend of activeBackends) { try { const results = await runBackend(backend, params.query, numResults, signal); return { content: [ { type: "text", text: errors.length > 0 ? `${errors.join("; ")}\n\n${formatResults(params.query, backend, results)}` : formatResults(params.query, backend, results), }, ], details: { backend: errors.length > 0 ? `${backend} (fallback)` : backend, resultCount: results.length, errors: errors.length > 0 ? errors : undefined, }, }; } catch (err) { errors.push(`${backend}: ${(err as Error).message}`); } } throw new Error(`All backends failed: ${errors.join("; ")}`); } }, }); // ----------------------------------------------------------------------- // Tool: web_read — Read/extract content from a URL // ----------------------------------------------------------------------- pi.registerTool({ name: "web_read", label: "Read Web Page", description: "Fetch a URL as markdown. Use objective for a concrete question, keywords for long pages, " + "rush for speed, smart for better narrowing.", promptSnippet: "Read content from a web page (supports markdown extraction)", promptGuidelines: [ "Use web_read when you need to read the content of a specific URL", "Set objective for a concrete question when only part of the page matters", "Add keywords for long pages when you know the relevant terms", "Choose rush for speed or smart for higher-quality narrowing", ], parameters: Type.Object({ url: Type.String({ description: "HTTP(S) URL or bare domain to fetch", }), fresh: Type.Optional( Type.Boolean({ description: "Bypass cache when freshness matters", }), ), keywords: Type.Optional( Type.Array(Type.String(), { description: "Keyword to focus extraction on relevant sections", }), ), mode: Type.Optional( StringEnum(["rush", "smart"] as const, { description: "rush = faster mode, smart = better section selection on long/noisy pages", }), ), objective: Type.Optional( Type.String({ description: "CSS selector for targeted extraction. Use when only part of the page matters.", }), ), }), async execute(_toolCallId, params, signal, _onUpdate, ctx) { refreshConfig(ctx.cwd); const url = params.url.startsWith("https://") || params.url.startsWith("http://") ? params.url : `https://${params.url}`; // Build Jina Reader URL const readerUrl = new URL("https://r.jina.ai/" + url); const headers: Record = { "Accept": "text/plain", }; // Optional Jina API key for higher rate limits (fallback to no-auth) const jinaKey = resolveBackendKey("jina"); if (jinaKey) { headers["Authorization"] = `Bearer ${jinaKey}`; } if (params.fresh) { headers["x-no-cache"] = "true"; } if (params.keywords && params.keywords.length > 0) { headers["x-keywords"] = params.keywords.join(", "); } if (params.mode) { headers["x-respond-with"] = params.mode === "rush" ? "text" : "markdown"; } if (params.objective) { headers["x-target-selector"] = params.objective; } const response = await fetch(readerUrl.toString(), { signal: timeoutSignal(signal), headers, }); if (!response.ok) { const text = await response.text().catch(() => ""); throw new Error(`Failed to read ${url}: ${sanitizeError(response.status, text)}`); } const content = await response.text(); const truncated = content.length > 10000 ? content.slice(0, 10000) + `\n\n[... truncated, full length: ${content.length} chars]` : content; return { content: [{ type: "text", text: truncated }], details: { url, length: content.length, truncated: content.length > 10000, }, }; }, }); // ----------------------------------------------------------------------- // Commands // ----------------------------------------------------------------------- pi.registerCommand("search-setup", { description: "Configure search backends interactively", handler: async (_args, ctx) => { if (!ctx.hasUI) { ctx.ui.notify("/search-setup requires interactive mode", "error"); return; } const backends = Object.values(BACKEND_DEFS) .filter(d => d.setupLabel !== null) .map(d => d.setupLabel!); const backendKey: Record = Object.fromEntries( Object.entries(BACKEND_DEFS) .filter(([_, d]) => d.setupLabel !== null) .map(([k, d]) => [d.setupLabel!, k]) ); const option = await ctx.ui.select("Which backend do you want to configure?", [ ...backends, "✅ Done — save and exit", ]); if (!option || option.startsWith("✅ Done")) { ctx.ui.notify("Search setup complete.", "info"); return; } const backend = backendKey[option]; const label = option; const key = await ctx.ui.input(`Enter your ${label} API key:`, { placeholder: "sk-...", validate: (v: string) => v.trim().length > 0 ? undefined : "Key cannot be empty", }); if (!key) { ctx.ui.notify("Setup cancelled.", "info"); return; } const configDir = join(getAgentDir(), "extensions"); const configPath = join(configDir, "search.json"); mkdirSync(configDir, { recursive: true }); let existing: SearchConfig = {}; if (existsSync(configPath)) { try { existing = JSON.parse(readFileSync(configPath, "utf-8")); } catch { // ignore } } // SearXNG setup needs both instance URL and optional API key let backendConfig: BackendConfig = { enabled: true }; if (backend === "searxng") { const url = await ctx.ui.input("Enter your SearXNG instance URL (e.g. http://localhost:8888):", { placeholder: "http://localhost:8888", validate: (v: string) => v.trim().length > 0 ? undefined : "URL cannot be empty", }); if (!url) { ctx.ui.notify("Setup cancelled.", "info"); return; } backendConfig.instanceUrl = url.trim(); // Optionally ask for API key (some instances require auth) const optionalKey = await ctx.ui.input("Optional API key (leave empty if none):", { placeholder: "sk-... (optional)", }); if (optionalKey && optionalKey.trim()) { backendConfig.apiKey = optionalKey.trim(); } } else { backendConfig.apiKey = key?.trim() || ""; } const updated: SearchConfig = { ...existing, backends: { ...existing.backends, [backend]: backendConfig, }, }; writeFileSync(configPath, JSON.stringify(updated, null, 2) + "\n", { mode: 0o600 }); ctx.ui.notify( `${label} API key saved to ${configPath}. Run /reload to activate.`, "success", ); }, }); pi.registerCommand("search-status", { description: "Show which search backends are configured and active", handler: async (_args, ctx) => { refreshConfig(ctx.cwd); const backendLabels: Record = Object.fromEntries( Object.entries(BACKEND_DEFS).map(([k, v]) => [k, `${v.label}${k === "duckduckgo" ? " (free, no key)" : ""}`]) ); // Collect table rows first to compute aligned column widths type Row = [string, string]; const rows: Row[] = []; for (const [name, label] of Object.entries(backendLabels)) { const { configured, source } = getKeySource(name); const bc = config.backends?.[name as keyof typeof config.backends]; if (name === "duckduckgo") { rows.push([label, "✓ enabled, key: — (free)"]); } else if (name === "marginalia" && bc?.enabled) { rows.push([label, "✓ enabled, key: optional (public)"]); } else if (name === "searxng" && bc?.enabled) { const urlInfo = bc.instanceUrl ? `url: ${bc.instanceUrl}` : "no URL set"; rows.push([label, `✓ enabled, ${urlInfo}${configured ? `, key: ✓ (${source})` : ", key: —"}`]); } else if (bc?.enabled) { rows.push([label, `✓ enabled, key: ✓${source ? ` (${source})` : ""}`]); } else { rows.push([label, `— disabled${configured ? `, key: ✓ (${source})` : ""}`]); } } // Compute column widths from headers + data const col1Header = "Backend"; const col2Header = "Status"; const w1 = rows.reduce((max, [c]) => Math.max(max, c.length), col1Header.length); const w2 = rows.reduce((max, [, s]) => Math.max(max, s.length), col2Header.length); const pad = (s: string, w: number) => s + " ".repeat(w - s.length); const tableLines = [ `| ${pad(col1Header, w1)} | ${pad(col2Header, w2)} |`, `| ${"-".repeat(w1)} | ${"-".repeat(w2)} |`, ...rows.map(([c1, c2]) => `| ${pad(c1, w1)} | ${pad(c2, w2)} |`), ]; const resolvedDefault = activeBackends[0] || "none"; const lines: string[] = [ "## Search Backend Status", `Configured default: ${config.defaultBackend || "none"}`, `Resolved default: ${resolvedDefault}`, `Active: ${activeBackends.join(", ") || "none"}`, "", ...tableLines, ]; if (activeBackends.length === 1 && activeBackends[0] === "duckduckgo") { lines.push(""); lines.push("Only DuckDuckGo is active (no API key needed)."); lines.push("Add a search backend with /search-setup to get more results."); } ctx.ui.notify(lines.join("\n"), "info"); }, }); // ----------------------------------------------------------------------- // Session start // ----------------------------------------------------------------------- pi.on("session_start", async (_event, ctx) => { backendCooldowns.clear(); refreshConfig(ctx.cwd); const status = activeBackends.join(", "); ctx.ui.setStatus("search", `search: ${status}`); }); } ================================================ FILE: package.json ================================================ { "name": "pi-search-hub", "version": "1.4.4", "description": "Unified web search + content extraction extension for pi with 12 backends (DuckDuckGo, Jina AI, Tavily, Brave, Exa, Serper, Firecrawl, Marginalia, LangSearch, WebSearchAPI, Perplexity Sonar, SearXNG). Auto-fallback, RRF combine mode, web_read tool, secure credential resolution.", "keywords": [ "pi-package", "pi", "pi-coding-agent", "search", "web-search", "web-read", "content-extraction", "search-hub", "duckduckgo", "jina", "tavily", "serper", "exa", "brave", "firecrawl", "langsearch", "websearchapi", "perplexity", "searxng", "rrf", "ai-agent" ], "author": "", "license": "MIT", "type": "module", "bugs": { "url": "https://github.com/ronnieops/pi-search-hub/issues" }, "repository": { "type": "git", "url": "git+https://github.com/ronnieops/pi-search-hub.git" }, "homepage": "https://github.com/ronnieops/pi-search-hub#readme", "pi": { "extensions": [ "./extensions/search-hub.ts" ], "image": "https://pi.dev/assets/packages/pi-search-hub.png" }, "peerDependencies": { "@earendil-works/pi-ai": "*", "@earendil-works/pi-coding-agent": "*" }, "peerDependenciesMeta": { "@earendil-works/pi-ai": { "optional": true }, "@earendil-works/pi-coding-agent": { "optional": true } }, "dependencies": { "typebox": "^1.1.24" }, "devDependencies": { "@types/node": "^25.6.0", "typescript": "^6.0.3", "vitest": "^4.1.7" } } ================================================ FILE: search.json.example ================================================ { "defaultBackend": "auto", "backends": { "duckduckgo": { "enabled": true }, "jina": { "enabled": true }, "marginalia": { "enabled": true }, "serper": { "enabled": true, "apiKey": "SERPER_API_KEY" }, "brave": { "enabled": true, "apiKey": "BRAVE_API_KEY" }, "tavily": { "enabled": true, "apiKey": "TAVILY_API_KEY" }, "exa": { "enabled": true, "apiKey": "EXA_API_KEY" }, "firecrawl": { "enabled": true, "apiKey": "FIRECRAWL_API_KEY" }, "langsearch": { "enabled": true, "apiKey": "LANGSEARCH_API_KEY" }, "websearchapi": { "enabled": true, "apiKey": "WEBSEARCHAPI_API_KEY" }, "perplexity": { "enabled": true, "apiKey": "PERPLEXITY_API_KEY", "model": "sonar" }, "searxng": { "enabled": true, "instanceUrl": "http://localhost:8888" } } }