Repository: foreveryh/langgraph-deep-research
Branch: main
Commit: 3757c39b6fa2
Files: 52
Total size: 454.6 KB
Directory structure:
gitextract_ckbl8hy3/
├── .gitignore
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── backend/
│ ├── .gitignore
│ ├── LICENSE
│ ├── Makefile
│ ├── langgraph.json
│ ├── pyproject.toml
│ ├── src/
│ │ └── agent/
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── configuration.py
│ │ ├── content_enhancement_decision.py
│ │ ├── enhanced_graph_nodes.py
│ │ ├── graph.py
│ │ ├── prompts.py
│ │ ├── report_level_enhancement.py
│ │ ├── state.py
│ │ ├── tools_and_schemas.py
│ │ └── utils.py
│ └── test-agent.ipynb
├── docker-compose.yml
├── docs/
│ ├── document-generation-flow-ZH.md
│ └── document-generation-flow.md
└── frontend/
├── .gitignore
├── components.json
├── eslint.config.js
├── index.html
├── package.json
├── src/
│ ├── App.tsx
│ ├── components/
│ │ ├── ActivityTimeline.tsx
│ │ ├── ChatMessagesView.tsx
│ │ ├── InputForm.tsx
│ │ ├── ResearchThinkPanel.tsx
│ │ ├── WelcomeScreen.tsx
│ │ └── ui/
│ │ ├── badge.tsx
│ │ ├── button.tsx
│ │ ├── card.tsx
│ │ ├── input.tsx
│ │ ├── scroll-area.tsx
│ │ ├── select.tsx
│ │ ├── tabs.tsx
│ │ └── textarea.tsx
│ ├── global.css
│ ├── lib/
│ │ └── utils.ts
│ ├── main.tsx
│ ├── utils/
│ │ └── dataTransformer.ts
│ └── vite-env.d.ts
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Node / Frontend
node_modules/
frontend/dist/
frontend/.vite/
frontend/coverage/
.DS_Store
*.local
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# IDE files
.idea/
.vscode/
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
# Optional backend venv (if created in root)
#.venv/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
uv.lock
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
backend/.langgraph_api
modify/
================================================
FILE: Dockerfile
================================================
# Stage 1: Build React Frontend
FROM node:20-alpine AS frontend-builder
# Set working directory for frontend
WORKDIR /app/frontend
# Copy frontend package files and install dependencies
COPY frontend/package.json ./
COPY frontend/package-lock.json ./
# If you use yarn or pnpm, adjust accordingly (e.g., copy yarn.lock or pnpm-lock.yaml and use yarn install or pnpm install)
RUN npm install
# Copy the rest of the frontend source code
COPY frontend/ ./
# Build the frontend
RUN npm run build
# Stage 2: Python Backend
FROM docker.io/langchain/langgraph-api:3.11
# -- Install UV --
# First install curl, then install UV using the standalone installer
RUN apt-get update && apt-get install -y curl && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
apt-get clean && rm -rf /var/lib/apt/lists/*
ENV PATH="/root/.local/bin:$PATH"
# -- End of UV installation --
# -- Copy built frontend from builder stage --
# The app.py expects the frontend build to be at ../frontend/dist relative to its own location.
# If app.py is at /deps/backend/src/agent/app.py, then ../frontend/dist resolves to /deps/frontend/dist.
COPY --from=frontend-builder /app/frontend/dist /deps/frontend/dist
# -- End of copying built frontend --
# -- Adding local package . --
ADD backend/ /deps/backend
# -- End of local package . --
# -- Installing all local dependencies using UV --
# First, we need to ensure pip is available for UV to use
RUN uv pip install --system pip setuptools wheel
# Install dependencies with UV, respecting constraints
RUN cd /deps/backend && \
PYTHONDONTWRITEBYTECODE=1 UV_SYSTEM_PYTHON=1 uv pip install --system -c /api/constraints.txt -e .
# -- End of local dependencies install --
ENV LANGGRAPH_HTTP='{"app": "/deps/backend/src/agent/app.py:app"}'
ENV LANGSERVE_GRAPHS='{"agent": "/deps/backend/src/agent/graph.py:graph"}'
# -- Ensure user deps didn't inadvertently overwrite langgraph-api
# Create all required directories that the langgraph-api package expects
RUN mkdir -p /api/langgraph_api /api/langgraph_runtime /api/langgraph_license /api/langgraph_storage && \
touch /api/langgraph_api/__init__.py /api/langgraph_runtime/__init__.py /api/langgraph_license/__init__.py /api/langgraph_storage/__init__.py
# Use pip for this specific package as it has poetry-based build requirements
RUN PYTHONDONTWRITEBYTECODE=1 pip install --no-cache-dir --no-deps -e /api
# -- End of ensuring user deps didn't inadvertently overwrite langgraph-api --
# -- Removing pip from the final image (but keeping UV) --
RUN uv pip uninstall --system pip setuptools wheel && \
rm -rf /usr/local/lib/python*/site-packages/pip* /usr/local/lib/python*/site-packages/setuptools* /usr/local/lib/python*/site-packages/wheel* && \
find /usr/local/bin -name "pip*" -delete
# -- End of pip removal --
WORKDIR /deps/backend
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: Makefile
================================================
.PHONY: help dev-frontend dev-backend dev
help:
@echo "Available commands:"
@echo " make dev-frontend - Starts the frontend development server (Vite)"
@echo " make dev-backend - Starts the backend development server (Uvicorn with reload)"
@echo " make dev - Starts both frontend and backend development servers"
dev-frontend:
@echo "Starting frontend development server..."
@cd frontend && pnpm run dev
dev-backend:
@echo "Starting backend development server..."
@cd backend && langgraph dev
# Run frontend and backend concurrently
dev:
@echo "Starting both frontend and backend development servers..."
@make dev-frontend & make dev-backend
================================================
FILE: README.md
================================================
# 🚀 Enhanced Version
> Based on the original project, I have optimized the Agent workflow and frontend display effects.
## **Agent Workflow Comparison**
Optimized Agent
Original Agent
## **Frontend Display Enhancement**
Enhanced Frontend
Original Frontend
## **Technical Documentation**
For detailed technical implementation and architecture analysis, please refer to:
- 📖 [`docs/document-generation-flow.md`](docs/document-generation-flow.md) - English Technical Documentation
- 📖 [`docs/document-generation-flow-ZH.md`](docs/document-generation-flow-ZH.md) - Chinese Technical Documentation
## **📞 Contact & Support**
**Author: Peng.G**
If you have any questions about this project or Agent development, or are interested in business collaboration opportunities, feel free to reach out:
---
## **Getting Started**
The setup process remains the same. Please follow the original project's official guidance below.
---
# Gemini Fullstack LangGraph Quickstart
This project demonstrates a fullstack application using a React frontend and a LangGraph-powered backend agent. The agent is designed to perform comprehensive research on a user's query by dynamically generating search terms, querying the web using Google Search, reflecting on the results to identify knowledge gaps, and iteratively refining its search until it can provide a well-supported answer with citations. This application serves as an example of building research-augmented conversational AI using LangGraph and Google's Gemini models.

## Features
- 💬 Fullstack application with a React frontend and LangGraph backend.
- 🧠 Powered by a LangGraph agent for advanced research and conversational AI.
- 🔍 Dynamic search query generation using Google Gemini models.
- 🌐 Integrated web research via Google Search API.
- 🤔 Reflective reasoning to identify knowledge gaps and refine searches.
- 📄 Generates answers with citations from gathered sources.
- 🔄 Hot-reloading for both frontend and backend development during development.
## Project Structure
The project is divided into two main directories:
- `frontend/`: Contains the React application built with Vite.
- `backend/`: Contains the LangGraph/FastAPI application, including the research agent logic.
## Getting Started: Development and Local Testing
Follow these steps to get the application running locally for development and testing.
**1. Prerequisites:**
- Node.js and npm (or yarn/pnpm)
- Python 3.8+
- **`GEMINI_API_KEY`**: The backend agent requires a Google Gemini API key.
1. Navigate to the `backend/` directory.
2. Create a file named `.env` by copying the `backend/.env.example` file.
3. Open the `.env` file and add your Gemini API key: `GEMINI_API_KEY="YOUR_ACTUAL_API_KEY"`
**2. Install Dependencies:**
**Backend:**
```bash
cd backend
pip install .
```
**Frontend:**
```bash
cd frontend
npm install
```
**3. Run Development Servers:**
**Backend & Frontend:**
```bash
make dev
```
This will run the backend and frontend development servers. Open your browser and navigate to the frontend development server URL (e.g., `http://localhost:5173/app`).
_Alternatively, you can run the backend and frontend development servers separately. For the backend, open a terminal in the `backend/` directory and run `langgraph dev`. The backend API will be available at `http://127.0.0.1:2024`. It will also open a browser window to the LangGraph UI. For the frontend, open a terminal in the `frontend/` directory and run `npm run dev`. The frontend will be available at `http://localhost:5173`._
## How the Backend Agent Works (High-Level)
The core of the backend is a LangGraph agent defined in `backend/src/agent/graph.py`. It follows these steps:

1. **Generate Initial Queries:** Based on your input, it generates a set of initial search queries using a Gemini model.
2. **Web Research:** For each query, it uses the Gemini model with the Google Search API to find relevant web pages.
3. **Reflection & Knowledge Gap Analysis:** The agent analyzes the search results to determine if the information is sufficient or if there are knowledge gaps. It uses a Gemini model for this reflection process.
4. **Iterative Refinement:** If gaps are found or the information is insufficient, it generates follow-up queries and repeats the web research and reflection steps (up to a configured maximum number of loops).
5. **Finalize Answer:** Once the research is deemed sufficient, the agent synthesizes the gathered information into a coherent answer, including citations from the web sources, using a Gemini model.
## Deployment
In production, the backend server serves the optimized static frontend build. LangGraph requires a Redis instance and a Postgres database. Redis is used as a pub-sub broker to enable streaming real time output from background runs. Postgres is used to store assistants, threads, runs, persist thread state and long term memory, and to manage the state of the background task queue with 'exactly once' semantics. For more details on how to deploy the backend server, take a look at the [LangGraph Documentation](https://langchain-ai.github.io/langgraph/concepts/deployment_options/). Below is an example of how to build a Docker image that includes the optimized frontend build and the backend server and run it via `docker-compose`.
_Note: For the docker-compose.yml example you need a LangSmith API key, you can get one from [LangSmith](https://smith.langchain.com/settings)._
_Note: If you are not running the docker-compose.yml example or exposing the backend server to the public internet, you update the `apiUrl` in the `frontend/src/App.tsx` file your host. Currently the `apiUrl` is set to `http://localhost:8123` for docker-compose or `http://localhost:2024` for development._
**1. Build the Docker Image:**
Run the following command from the **project root directory**:
```bash
docker build -t gemini-fullstack-langgraph -f Dockerfile .
```
**2. Run the Production Server:**
```bash
GEMINI_API_KEY= LANGSMITH_API_KEY= docker-compose up
```
Open your browser and navigate to `http://localhost:8123/app/` to see the application. The API will be available at `http://localhost:8123`.
## Technologies Used
- [React](https://reactjs.org/) (with [Vite](https://vitejs.dev/)) - For the frontend user interface.
- [Tailwind CSS](https://tailwindcss.com/) - For styling.
- [Shadcn UI](https://ui.shadcn.com/) - For components.
- [LangGraph](https://github.com/langchain-ai/langgraph) - For building the backend research agent.
- [Google Gemini](https://ai.google.dev/models/gemini) - LLM for query generation, reflection, and answer synthesis.
## License
This project is licensed under the Apache License 2.0. See the [LICENSE](LICENSE) file for details.
================================================
FILE: backend/.gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
uv.lock
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
ai_test/
================================================
FILE: backend/LICENSE
================================================
MIT License
Copyright (c) 2025 Philipp Schmid
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: backend/Makefile
================================================
.PHONY: all format lint test tests test_watch integration_tests docker_tests help extended_tests
# Default target executed when no arguments are given to make.
all: help
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
test:
uv run --with-editable . pytest $(TEST_FILE)
test_watch:
uv run --with-editable . ptw --snapshot-update --now . -- -vv tests/unit_tests
test_profile:
uv run --with-editable . pytest -vv tests/unit_tests/ --profile-svg
extended_tests:
uv run --with-editable . pytest --only-extended $(TEST_FILE)
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
PYTHON_FILES=src/
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --name-only --diff-filter=d main | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=src
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
uv run ruff check .
[ "$(PYTHON_FILES)" = "" ] || uv run ruff format $(PYTHON_FILES) --diff
[ "$(PYTHON_FILES)" = "" ] || uv run ruff check --select I $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || uv run mypy --strict $(PYTHON_FILES)
[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && uv run mypy --strict $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
uv run ruff format $(PYTHON_FILES)
uv run ruff check --select I --fix $(PYTHON_FILES)
spell_check:
codespell --toml pyproject.toml
spell_fix:
codespell --toml pyproject.toml -w
######################
# HELP
######################
help:
@echo '----'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo 'test - run unit tests'
@echo 'tests - run unit tests'
@echo 'test TEST_FILE= - run all tests in file'
@echo 'test_watch - run unit tests in watch mode'
================================================
FILE: backend/langgraph.json
================================================
{
"dependencies": ["."],
"graphs": {
"agent": "./src/agent/graph.py:graph"
},
"http": {
"app": "./src/agent/app.py:app"
},
"env": ".env"
}
================================================
FILE: backend/pyproject.toml
================================================
[project]
name = "agent"
version = "0.0.1"
description = "Backend for the LangGraph agent"
authors = [
{ name = "Philipp Schmid", email = "schmidphilipp1995@gmail.com" },
]
readme = "README.md"
license = { text = "MIT" }
requires-python = ">=3.11,<4.0"
dependencies = [
"langgraph>=0.2.6",
"langchain>=0.3.19",
"langchain-google-genai",
"python-dotenv>=1.0.1",
"langgraph-sdk>=0.1.57",
"langgraph-cli",
"langgraph-api",
"fastapi",
"google-genai",
"tiktoken>=0.8.0",
"firecrawl-py>=2.7.0",
]
[project.optional-dependencies]
dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
[build-system]
requires = ["setuptools>=73.0.0", "wheel"]
build-backend = "setuptools.build_meta"
[tool.ruff]
lint.select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"D", # pydocstyle
"D401", # First line should be in imperative mood
"T201",
"UP",
]
lint.ignore = [
"UP006",
"UP007",
# We actually do want to import from typing_extensions
"UP035",
# Relax the convention by _not_ requiring documentation for every function parameter.
"D417",
"E501",
]
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["D", "UP"]
[tool.ruff.lint.pydocstyle]
convention = "google"
[dependency-groups]
dev = [
"langgraph-cli[inmem]>=0.1.71",
"pytest>=8.3.5",
]
================================================
FILE: backend/src/agent/__init__.py
================================================
from agent.graph import graph
__all__ = ["graph"]
================================================
FILE: backend/src/agent/app.py
================================================
# mypy: disable - error - code = "no-untyped-def,misc"
import pathlib
from fastapi import FastAPI, Request, Response
from fastapi.staticfiles import StaticFiles
import fastapi.exceptions
# Define the FastAPI app
app = FastAPI()
def create_frontend_router(build_dir="../frontend/dist"):
"""Creates a router to serve the React frontend.
Args:
build_dir: Path to the React build directory relative to this file.
Returns:
A Starlette application serving the frontend.
"""
build_path = pathlib.Path(__file__).parent.parent.parent / build_dir
static_files_path = build_path / "assets" # Vite uses 'assets' subdir
if not build_path.is_dir() or not (build_path / "index.html").is_file():
print(
f"WARN: Frontend build directory not found or incomplete at {build_path}. Serving frontend will likely fail."
)
# Return a dummy router if build isn't ready
from starlette.routing import Route
async def dummy_frontend(request):
return Response(
"Frontend not built. Run 'npm run build' in the frontend directory.",
media_type="text/plain",
status_code=503,
)
return Route("/{path:path}", endpoint=dummy_frontend)
build_dir = pathlib.Path(build_dir)
react = FastAPI(openapi_url="")
react.mount(
"/assets", StaticFiles(directory=static_files_path), name="static_assets"
)
@react.get("/{path:path}")
async def handle_catch_all(request: Request, path: str):
fp = build_path / path
if not fp.exists() or not fp.is_file():
fp = build_path / "index.html"
return fastapi.responses.FileResponse(fp)
return react
# Mount the frontend under /app to not conflict with the LangGraph API routes
app.mount(
"/app",
create_frontend_router(),
name="frontend",
)
================================================
FILE: backend/src/agent/configuration.py
================================================
import os
from pydantic import BaseModel, Field
from typing import Any, Optional
from langchain_core.runnables import RunnableConfig
class Configuration(BaseModel):
"""The configuration for the agent."""
query_generator_model: str = Field(
default="gemini-2.5-flash-preview-04-17",
metadata={
"description": "The name of the language model to use for the agent's query generation."
},
)
reflection_model: str = Field(
default="gemini-2.5-flash-preview-04-17",
metadata={
"description": "The name of the language model to use for the agent's reflection."
},
)
answer_model: str = Field(
default="gemini-2.5-flash-preview-04-17",
metadata={
"description": "The name of the language model to use for the agent's answer."
},
)
number_of_initial_queries: int = Field(
default=6,
metadata={"description": "The number of initial search queries to generate."},
)
max_research_loops: int = Field(
default=8,
metadata={"description": "The maximum number of research loops to perform."},
)
@classmethod
def from_runnable_config(
cls, config: Optional[RunnableConfig] = None
) -> "Configuration":
"""Create a Configuration instance from a RunnableConfig."""
configurable = (
config["configurable"] if config and "configurable" in config else {}
)
# Get raw values from environment or config
raw_values: dict[str, Any] = {
name: os.environ.get(name.upper(), configurable.get(name))
for name in cls.model_fields.keys()
}
# Filter out None values
values = {k: v for k, v in raw_values.items() if v is not None}
return cls(**values)
================================================
FILE: backend/src/agent/content_enhancement_decision.py
================================================
"""
智能内容增强决策模块 - 决定何时使用Firecrawl进行深度内容抓取
"""
import os
from typing import Dict, List, Any, Optional
from dataclasses import dataclass
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableConfig
from firecrawl import FirecrawlApp
@dataclass
class EnhancementDecision:
"""内容增强决策结果"""
needs_enhancement: bool
priority_urls: List[Dict[str, Any]]
reasoning: str
confidence_score: float # 0-1
enhancement_type: str # "none", "selective", "comprehensive"
class ContentEnhancementDecisionMaker:
"""智能内容增强决策器 - 类似reflection机制"""
def __init__(self):
self.firecrawl_app = None
if os.getenv("FIRECRAWL_API_KEY"):
self.firecrawl_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
def analyze_enhancement_need(
self,
research_topic: str,
current_findings: List[str],
grounding_sources: List[Dict[str, Any]],
config: RunnableConfig
) -> EnhancementDecision:
"""
智能分析是否需要内容增强 - 使用LLM做判断
类似reflection机制,让LLM分析当前研究质量并决定是否需要深度抓取
"""
# 构建分析提示词
analysis_prompt = self._build_analysis_prompt(
research_topic, current_findings, grounding_sources
)
# 使用LLM进行智能判断
from agent.configuration import Configuration
configurable = Configuration.from_runnable_config(config)
llm = ChatGoogleGenerativeAI(
model=configurable.reflection_model, # 使用和reflection相同的模型
temperature=0.3, # 低温度确保一致性
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
response = llm.invoke(analysis_prompt)
decision_text = response.content if hasattr(response, 'content') else str(response)
# 解析LLM的决策
return self._parse_llm_decision(decision_text, grounding_sources)
def _build_analysis_prompt(
self,
research_topic: str,
current_findings: List[str],
grounding_sources: List[Dict[str, Any]]
) -> str:
"""构建分析提示词"""
findings_summary = "\n---\n".join(current_findings[-3:]) # 最近3个结果
sources_list = "\n".join([
f"- {source.get('title', 'N/A')}: {source.get('url', 'N/A')}"
for source in grounding_sources[:5] # 前5个源
])
return f"""你是一个研究质量评估专家。请分析当前的研究结果质量,并决定是否需要深度内容增强。
研究主题: {research_topic}
当前研究发现:
{findings_summary}
可用的信息源:
{sources_list}
请根据以下标准进行评估:
1. **内容深度不足的信号**:
- 缺乏具体数据、统计信息、案例研究
- 描述过于泛泛,缺乏技术细节
- 没有提及重要的公司、项目或实施案例
- 信息源质量不高(非权威网站)
2. **需要深度抓取的情况**:
- 研究主题需要详细的技术说明
- 当前结果缺乏关键数据支撑
- 存在权威信息源但内容被截断
- 需要获取完整的报告或研究内容
3. **评估当前信息源的价值**:
- 官方网站/文档: 高价值
- 学术论文/研究报告: 高价值
- 维基百科/百科类: 中等价值
- 新闻报道: 根据详细程度判断
- 博客/论坛: 低价值
请按以下格式回答:
**决策**: [ENHANCE/NO_ENHANCE]
**置信度**: [0.1-1.0]
**增强类型**: [selective/comprehensive/none]
**推荐URL数量**: [0-3]
**推理过程**:
[详细说明你的判断理由,包括当前内容的不足之处和预期的改进效果]
**优先URLs** (如果需要增强):
[从信息源中选择最值得深度抓取的URL,按优先级排序]
"""
def _parse_llm_decision(
self,
decision_text: str,
grounding_sources: List[Dict[str, Any]]
) -> EnhancementDecision:
"""解析LLM的决策结果"""
decision_text = decision_text.lower()
# 解析基本决策
needs_enhancement = "enhance" in decision_text and "no_enhance" not in decision_text
# 解析置信度
confidence_score = 0.5 # 默认值
import re
confidence_match = re.search(r'置信度.*?([0-9]\.[0-9])', decision_text)
if confidence_match:
try:
confidence_score = float(confidence_match.group(1))
except:
pass
# 解析增强类型
enhancement_type = "none"
if "selective" in decision_text:
enhancement_type = "selective"
elif "comprehensive" in decision_text:
enhancement_type = "comprehensive"
elif needs_enhancement:
enhancement_type = "selective" # 默认选择性增强
# 选择优先URL(简化版本,可以后续改进为LLM选择)
priority_urls = []
if needs_enhancement and grounding_sources:
# 简单的优先级算法
scored_sources = []
for source in grounding_sources:
score = self._calculate_url_priority(source)
scored_sources.append((source, score))
# 按评分排序,选择前2-3个
scored_sources.sort(key=lambda x: x[1], reverse=True)
max_urls = 3 if enhancement_type == "comprehensive" else 2
priority_urls = [
{
"title": source.get("title", ""),
"url": source.get("url", ""),
"priority_score": score,
"reasoning": f"评分: {score:.2f}"
}
for source, score in scored_sources[:max_urls]
if score > 0.3 # 只选择评分较高的
]
return EnhancementDecision(
needs_enhancement=needs_enhancement,
priority_urls=priority_urls,
reasoning=decision_text,
confidence_score=confidence_score,
enhancement_type=enhancement_type
)
def _calculate_url_priority(self, source: Dict[str, Any]) -> float:
"""计算URL的优先级评分"""
score = 0.0
url = source.get("url", "").lower()
title = source.get("title", "").lower()
# 官方网站和文档
if any(domain in url for domain in [".gov", ".edu", ".org"]):
score += 0.4
# 知名平台
if any(platform in url for platform in ["wikipedia", "arxiv", "ieee", "acm"]):
score += 0.3
# 技术内容指标
if any(keyword in title for keyword in ["report", "study", "research", "analysis", "technical"]):
score += 0.2
# 公司官网
if any(company in url for company in ["google", "microsoft", "amazon", "tesla", "nvidia"]):
score += 0.2
# 基础评分
score += 0.1
return min(score, 1.0)
async def enhance_content_with_firecrawl(
self,
priority_urls: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""使用Firecrawl增强内容"""
if not self.firecrawl_app:
return []
enhanced_results = []
for url_info in priority_urls:
url = url_info.get("url")
if not url:
continue
try:
print(f"🔥 Firecrawl增强: {url_info.get('title', 'Unknown')}")
result = self.firecrawl_app.scrape_url(url)
if result and result.success:
markdown_content = result.markdown or ''
enhanced_results.append({
"url": url,
"title": url_info.get("title", ""),
"original_priority": url_info.get("priority_score", 0),
"enhanced_content": markdown_content,
"content_length": len(markdown_content),
"enhancement_quality": self._assess_enhancement_quality(markdown_content),
"source_type": "firecrawl_enhanced"
})
print(f" ✅ 增强成功: {len(markdown_content)} 字符")
else:
print(f" ❌ 增强失败: {result.error if hasattr(result, 'error') else '未知错误'}")
except Exception as e:
print(f" ❌ 增强异常: {str(e)}")
continue
return enhanced_results
def _assess_enhancement_quality(self, content: str) -> str:
"""评估增强内容的质量"""
if not content:
return "poor"
length = len(content)
has_data = any(char.isdigit() for char in content)
has_structure = any(marker in content for marker in ['#', '##', '###'])
if length > 5000 and has_data and has_structure:
return "excellent"
elif length > 1000 and (has_data or has_structure):
return "good"
elif length > 300:
return "fair"
else:
return "poor"
# 延迟初始化函数,避免循环导入
def get_content_enhancement_decision_maker():
"""获取内容增强决策器实例(延迟初始化)"""
if not hasattr(get_content_enhancement_decision_maker, '_instance'):
get_content_enhancement_decision_maker._instance = ContentEnhancementDecisionMaker()
return get_content_enhancement_decision_maker._instance
# 为了向后兼容,保留原有的全局变量名
content_enhancement_decision_maker = None # 将在首次使用时初始化
================================================
FILE: backend/src/agent/enhanced_graph_nodes.py
================================================
"""
增强的Graph节点 - 集成智能Firecrawl内容增强功能
"""
import os
import json
from typing import List, Dict, Any
from datetime import datetime
from langchain_core.runnables import RunnableConfig
from langchain_core.messages import AIMessage
from agent.state import OverallState, ReflectionState
from agent.content_enhancement_decision import (
get_content_enhancement_decision_maker,
EnhancementDecision
)
from agent.utils import get_research_topic
def content_enhancement_analysis(state: OverallState, config: RunnableConfig) -> dict:
"""
智能内容增强分析节点 - 决定是否需要使用Firecrawl进行深度抓取
这个节点会:
1. 分析当前研究结果的质量
2. 评估是否需要深度内容增强
3. 选择优先的URL进行Firecrawl抓取
4. 执行内容增强(如果需要)
5. 将增强的内容合并到研究结果中
"""
try:
# 获取当前研究上下文
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
# 确定研究主题
if plan and current_pointer < len(plan):
research_topic = plan[current_pointer]["description"]
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# 获取当前研究发现
current_findings = state.get("web_research_result", [])
# 获取grounding sources(从最近的搜索结果中提取)
grounding_sources = []
sources_gathered = state.get("sources_gathered", [])
for source in sources_gathered[-10:]: # 最近的10个源
if isinstance(source, dict):
grounding_sources.append({
"title": source.get("title", ""),
"url": source.get("url", ""),
"snippet": source.get("snippet", "")
})
print(f"🤔 分析内容增强需求...")
print(f" 研究主题: {research_topic}")
print(f" 当前发现数量: {len(current_findings)}")
print(f" 可用信息源: {len(grounding_sources)}")
# 使用智能决策器进行分析
decision = get_content_enhancement_decision_maker().analyze_enhancement_need(
research_topic=research_topic,
current_findings=current_findings,
grounding_sources=grounding_sources,
config=config
)
print(f"📊 增强决策结果:")
print(f" 需要增强: {decision.needs_enhancement}")
print(f" 置信度: {decision.confidence_score:.2f}")
print(f" 增强类型: {decision.enhancement_type}")
print(f" 优先URL数量: {len(decision.priority_urls)}")
# 保存决策到状态
state_update = {
"enhancement_decision": {
"needs_enhancement": decision.needs_enhancement,
"confidence_score": decision.confidence_score,
"enhancement_type": decision.enhancement_type,
"reasoning": decision.reasoning,
"priority_urls": decision.priority_urls
}
}
# 如果不需要增强,直接返回
if not decision.needs_enhancement:
print("✅ 当前内容质量充足,无需增强")
state_update["enhancement_status"] = "skipped"
return state_update
# 如果没有Firecrawl API Key,跳过增强
if not get_content_enhancement_decision_maker().firecrawl_app:
print("⚠️ 缺少FIRECRAWL_API_KEY,跳过内容增强")
state_update["enhancement_status"] = "skipped_no_api"
return state_update
# 执行内容增强
print(f"🔥 执行Firecrawl内容增强...")
enhanced_results = []
# 同步调用(暂时简化,后续可改为异步)
for url_info in decision.priority_urls:
url = url_info.get("url")
if not url:
continue
try:
print(f" 正在抓取: {url_info.get('title', 'Unknown')}")
result = get_content_enhancement_decision_maker().firecrawl_app.scrape_url(url)
if result and result.success:
markdown_content = result.markdown or ''
enhanced_results.append({
"url": url,
"title": url_info.get("title", ""),
"original_priority": url_info.get("priority_score", 0),
"enhanced_content": markdown_content,
"content_length": len(markdown_content),
"source_type": "firecrawl_enhanced",
"timestamp": datetime.now().isoformat()
})
print(f" ✅ 成功: {len(markdown_content)} 字符")
else:
print(f" ❌ 失败: {result.error if hasattr(result, 'error') else '未知错误'}")
except Exception as e:
print(f" ❌ 异常: {str(e)}")
continue
if enhanced_results:
# 将增强内容添加到研究结果中
enhanced_contents = []
for result in enhanced_results:
# 格式化增强内容
formatted_content = f"""
## 深度内容增强 - {result['title']}
来源: {result['url']}
内容长度: {result['content_length']} 字符
{result['enhanced_content'][:3000]}{'...' if len(result['enhanced_content']) > 3000 else ''}
---
"""
enhanced_contents.append(formatted_content)
state_update.update({
"enhanced_content_results": enhanced_results,
"web_research_result": enhanced_contents, # 添加到研究结果中
"enhancement_status": "completed",
"enhanced_sources_count": len(enhanced_results)
})
print(f"✅ 内容增强完成: {len(enhanced_results)} 个源")
else:
print("❌ 内容增强失败,没有成功抓取任何内容")
state_update["enhancement_status"] = "failed"
return state_update
except Exception as e:
error_message = f"内容增强分析节点异常: {str(e)}"
print(f"❌ {error_message}")
return {
"enhancement_status": "error",
"enhancement_error": error_message
}
def should_enhance_content(state: OverallState) -> str:
"""
条件边函数 - 决定是否进入内容增强流程
基于以下条件判断:
1. 是否配置了Firecrawl API Key
2. 当前研究循环次数
3. 用户配置的增强偏好
"""
# 检查Firecrawl可用性
if not os.getenv("FIRECRAWL_API_KEY"):
print("⚠️ 跳过内容增强: 未配置FIRECRAWL_API_KEY")
return "continue_without_enhancement"
# 检查研究循环次数(避免在早期循环中增强)
research_loop_count = state.get("research_loop_count", 0)
if research_loop_count < 1: # 至少进行一轮研究后再考虑增强
print(f"⚠️ 跳过内容增强: 研究循环次数不足 ({research_loop_count})")
return "continue_without_enhancement"
# 检查是否已经进行过增强(避免重复增强)
if state.get("enhancement_status") in ["completed", "skipped"]:
print("⚠️ 跳过内容增强: 已经完成增强")
return "continue_without_enhancement"
# 检查当前发现数量(至少要有一些基础内容)
current_findings = state.get("web_research_result", [])
if len(current_findings) < 1:
print("⚠️ 跳过内容增强: 缺少基础研究内容")
return "continue_without_enhancement"
print("✅ 满足增强条件,进入内容增强分析")
return "analyze_enhancement_need"
def enhanced_reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
"""
增强版反思节点 - 在原有reflection基础上考虑内容增强的结果
"""
# 先调用原有的reflection逻辑
from agent.graph import reflection
reflection_result = reflection(state, config)
# 如果进行了内容增强,调整reflection的判断
enhancement_status = state.get("enhancement_status")
enhanced_sources_count = state.get("enhanced_sources_count", 0)
if enhancement_status == "completed" and enhanced_sources_count > 0:
print(f"📈 内容增强完成,调整反思判断")
print(f" 增强了 {enhanced_sources_count} 个信息源")
# 如果成功增强了内容,更倾向于认为信息充足
# 但仍然保留LLM的判断权重
if not reflection_result["is_sufficient"]:
# 给增强内容一定的"加分"
enhancement_boost = min(enhanced_sources_count * 0.3, 0.8)
print(f" 由于内容增强,提升充足性评估 (+{enhancement_boost:.1f})")
# 如果增强效果很好,可能将"不充足"改为"充足"
if enhancement_boost >= 0.6:
print(" ✅ 基于内容增强结果,判定信息已充足")
reflection_result["is_sufficient"] = True
reflection_result["knowledge_gap"] = "内容已通过深度抓取得到充分补充"
elif enhancement_status == "skipped":
print("📝 内容增强被跳过,使用原始反思结果")
elif enhancement_status == "failed":
print("⚠️ 内容增强失败,可能需要更多研究循环")
return reflection_result
# 辅助函数:格式化增强决策信息用于日志
def format_enhancement_decision_log(decision: EnhancementDecision) -> str:
"""格式化增强决策信息用于日志输出"""
log_lines = [
f"📊 内容增强决策报告:",
f" 决策: {'需要增强' if decision.needs_enhancement else '无需增强'}",
f" 置信度: {decision.confidence_score:.2f}",
f" 增强类型: {decision.enhancement_type}",
f" 优先URL数量: {len(decision.priority_urls)}"
]
if decision.priority_urls:
log_lines.append(" 优先URLs:")
for i, url_info in enumerate(decision.priority_urls, 1):
log_lines.append(f" {i}. {url_info.get('title', 'N/A')} (评分: {url_info.get('priority_score', 0):.2f})")
log_lines.append(f" 推理: {decision.reasoning[:200]}...")
return "\n".join(log_lines)
================================================
FILE: backend/src/agent/graph.py
================================================
import os
import json
from typing import List
from datetime import datetime
from agent.tools_and_schemas import SearchQueryList, Reflection, ResearchPlan, LedgerEntry
from dotenv import load_dotenv
from langchain_core.messages import AIMessage
from langgraph.types import Send
from langgraph.graph import StateGraph
from langgraph.graph import START, END
from langchain_core.runnables import RunnableConfig
from google.genai import Client
import tiktoken # 需确保环境已安装 tiktoken
from agent.state import (
OverallState,
QueryGenerationState,
ReflectionState,
WebSearchState,
)
from agent.configuration import Configuration
from agent.prompts import (
get_current_date,
query_writer_instructions,
web_searcher_instructions,
reflection_instructions,
answer_instructions,
planning_instructions,
integrated_report_instructions,
)
from langchain_google_genai import ChatGoogleGenerativeAI
from agent.utils import (
get_citations,
get_research_topic,
insert_citation_markers,
resolve_urls,
)
# Import intelligent content enhancement modules
from agent.enhanced_graph_nodes import (
content_enhancement_analysis,
should_enhance_content
)
load_dotenv()
if os.getenv("GEMINI_API_KEY") is None:
raise ValueError("GEMINI_API_KEY is not set")
# Used for Google Search API
genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
# Nodes
def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
"""LangGraph node that generates search queries based on the current research task from the plan."""
configurable = Configuration.from_runnable_config(config)
# check for custom initial search query count
if state.get("initial_search_query_count") is None:
state["initial_search_query_count"] = configurable.number_of_initial_queries
# init Gemini 2.0 Flash
llm = ChatGoogleGenerativeAI(
model=configurable.query_generator_model,
temperature=1.0,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
structured_llm = llm.with_structured_output(SearchQueryList)
# New logic: prioritize generating queries based on current plan task
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"]
else:
# Fallback to user_query or messages
research_topic = state.get("user_query") or get_research_topic(state["messages"])
current_date = get_current_date()
formatted_prompt = query_writer_instructions.format(
current_date=current_date,
research_topic=research_topic,
number_queries=state["initial_search_query_count"],
)
result = structured_llm.invoke(formatted_prompt)
return {
"query_list": result.query,
"plan": state.get("plan", []),
"current_task_pointer": state.get("current_task_pointer", 0)
}
def continue_to_web_research(state: QueryGenerationState):
"""LangGraph node that sends the search queries to the web research node.
This is used to spawn n number of web research nodes, one for each search query.
"""
# Get current task info
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task_id = "unknown"
if plan and current_pointer < len(plan):
current_task_id = plan[current_pointer]["id"]
return [
Send("web_research", {
"search_query": search_query,
"id": int(idx),
"current_task_id": current_task_id
})
for idx, search_query in enumerate(state["query_list"])
]
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
"""LangGraph node that performs web research using the native Google Search API tool.
Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.
Args:
state: Current graph state containing the search query and research loop count
config: Configuration for the runnable, including search API settings
Returns:
Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
"""
try:
# Configure
configurable = Configuration.from_runnable_config(config)
formatted_prompt = web_searcher_instructions.format(
current_date=get_current_date(),
research_topic=state["search_query"],
)
# Uses the google genai client as the langchain client doesn't return grounding metadata
response = genai_client.models.generate_content(
model=configurable.query_generator_model,
contents=formatted_prompt,
config={
"tools": [{"google_search": {}}],
"temperature": 0,
},
)
# Error handling for empty response
if not response.candidates or not response.candidates[0].grounding_metadata:
current_task_id = state.get("current_task_id", "unknown")
error_content = f"No results found for query: {state['search_query']}"
detailed_finding = {
"task_id": current_task_id,
"query_id": state["id"],
"content": error_content,
"source": None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id,
"content": error_content,
"sources": [],
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": [],
"executed_search_queries": [state["search_query"]],
"web_research_result": [error_content],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
# resolve the urls to short urls for saving tokens and time
resolved_urls = resolve_urls(
response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
)
# Gets the citations and adds them to the generated text
citations = get_citations(response, resolved_urls)
modified_text = insert_citation_markers(response.text, citations)
sources_gathered = [item for citation in citations for item in citation["segments"]]
# Create detailed findings entry with task ID
current_task_id = state.get("current_task_id", "unknown")
detailed_finding = {
"task_id": current_task_id,
"query_id": state["id"],
"content": modified_text,
"source": sources_gathered[0] if sources_gathered else None,
"timestamp": datetime.now().isoformat()
}
# Add task-specific metadata to the research result
task_specific_result = {
"task_id": current_task_id,
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": sources_gathered,
"executed_search_queries": [state["search_query"]],
"web_research_result": [modified_text],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
except Exception as e:
# Error handling for API or processing errors
current_task_id = state.get("current_task_id", "unknown")
error_message = f"Error during web research: {str(e)}"
detailed_finding = {
"task_id": current_task_id,
"query_id": state["id"],
"content": error_message,
"source": None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id,
"content": error_message,
"sources": [],
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": [],
"executed_search_queries": [state["search_query"]],
"web_research_result": [error_message],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
def reflection(state: OverallState, config: RunnableConfig) -> OverallState:
"""LangGraph node that identifies knowledge gaps and generates potential follow-up queries.
This is where we check if our search results are sufficient to answer the research question.
If not, we generate follow-up queries to address the knowledge gap.
"""
try:
configurable = Configuration.from_runnable_config(config)
# Increment research loop counter
state["research_loop_count"] = state.get("research_loop_count", 0) + 1
reasoning_model = configurable.reasoning_model
current_date = get_current_date()
research_topic = get_research_topic(state["messages"])
# Safely retrieve web research results and truncate overly long content
web_research_results = state.get("web_research_result", [])
# Content truncation: limit total characters to avoid API limits
MAX_CHARS = 50000 # Approximately 12500 tokens
truncated_results = []
total_chars = 0
for result in web_research_results:
result_str = str(result)
if total_chars + len(result_str) <= MAX_CHARS:
truncated_results.append(result_str)
total_chars += len(result_str)
else:
# Partially truncate the last result
remaining_chars = MAX_CHARS - total_chars
if remaining_chars > 500: # Keep at least 500 characters
truncated_results.append(result_str[:remaining_chars] + "...[truncated]")
break
print(f"🔍 Reflection analysis: {len(web_research_results)} results, {len(truncated_results)} after truncation, {total_chars} characters")
formatted_prompt = reflection_instructions.format(
current_date=current_date,
research_topic=research_topic,
summaries="\n\n---\n\n".join(truncated_results),
)
# Check prompt length
prompt_length = len(formatted_prompt)
print(f"📏 Reflection prompt length: {prompt_length} characters")
if prompt_length > 100000: # If still too long, further truncate
print("⚠️ Prompt too long, further truncating summaries section")
truncated_summaries = "\n\n---\n\n".join(truncated_results[:3]) # Keep only first 3 results
formatted_prompt = reflection_instructions.format(
current_date=current_date,
research_topic=research_topic,
summaries=truncated_summaries,
)
# Initialize LLM
llm = ChatGoogleGenerativeAI(
model=reasoning_model,
temperature=1.0,
max_retries=3, # Increase retry count
api_key=os.getenv("GEMINI_API_KEY"),
)
# Try structured output
try:
print("🤖 Calling Gemini API for reflection analysis...")
result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
print("✅ Reflection analysis completed successfully")
except Exception as api_error:
print(f"❌ Structured output failed: {str(api_error)}")
print("🔄 Trying fallback approach...")
# Fallback: use simple text generation instead of structured output
simple_prompt = f"""Based on the research topic: {research_topic}
Research results summary: {len(truncated_results)} sources analyzed.
Please evaluate if this research is sufficient and respond in this exact JSON format:
{{
"is_sufficient": true,
"knowledge_gap": "Research appears comprehensive based on available sources",
"follow_up_queries": []
}}
Important: Respond only with valid JSON."""
try:
fallback_response = llm.invoke(simple_prompt)
import json
# 尝试解析JSON响应
response_text = fallback_response.content if hasattr(fallback_response, 'content') else str(fallback_response)
# 提取JSON部分
import re
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
if json_match:
result_dict = json.loads(json_match.group())
# 创建Reflection对象
result = Reflection(
is_sufficient=result_dict.get("is_sufficient", True),
knowledge_gap=result_dict.get("knowledge_gap", "Analysis completed with available data"),
follow_up_queries=result_dict.get("follow_up_queries", [])
)
print("✅ Fallback方案成功")
else:
raise ValueError("无法解析JSON响应")
except Exception as fallback_error:
print(f"❌ Fallback方案也失败: {str(fallback_error)}")
print("🛡️ 使用默认reflection结果")
# 最终fallback: 基于结果数量的简单判断
has_sufficient_results = len(web_research_results) >= 3
result = Reflection(
is_sufficient=has_sufficient_results,
knowledge_gap="Analysis completed with available research data" if has_sufficient_results else "Limited research data available",
follow_up_queries=[] if has_sufficient_results else [f"additional information about {research_topic}"]
)
print(f"🛡️ 默认判断: sufficient={has_sufficient_results}, 基于{len(web_research_results)}个搜索结果")
except Exception as e:
error_message = f"Reflection node encountered critical error: {str(e)}"
print(f"💥 {error_message}")
# Emergency fallback: always consider current results sufficient to avoid flow interruption
result = Reflection(
is_sufficient=True,
knowledge_gap="Analysis completed despite technical difficulties",
follow_up_queries=[]
)
print("🚨 Using emergency fallback, marking as sufficient to continue flow")
# Return updated state with reflection results
return {
"research_loop_count": state["research_loop_count"],
"reflection_is_sufficient": result.is_sufficient, # 新增字段保存reflection结果
"reflection_knowledge_gap": result.knowledge_gap, # 新增字段保存知识差距
"reflection_follow_up_queries": result.follow_up_queries, # 新增字段保存follow-up查询
"number_of_ran_queries": len(state.get("executed_search_queries", [])),
"plan": state.get("plan", []),
"current_task_pointer": state.get("current_task_pointer", 0)
}
def evaluate_research_enhanced(state: OverallState, config: RunnableConfig) -> dict:
"""
增强版研究评估节点 - 更新状态中的评估结果
这个函数只负责状态更新,不负责路由决策
"""
configurable = Configuration.from_runnable_config(config)
# 获取reflection结果
research_loop_count = state.get("research_loop_count", 0)
max_research_loops = configurable.max_research_loops
reflection_is_sufficient = state.get("reflection_is_sufficient", False)
reflection_follow_up_queries = state.get("reflection_follow_up_queries", [])
# 检查是否已经完成增强以及增强的效果
enhancement_status = state.get("enhancement_status")
enhanced_sources_count = state.get("enhanced_sources_count", 0)
# 智能决策:考虑reflection结果和增强效果
is_sufficient = reflection_is_sufficient
# 如果reflection认为不充足,但我们成功进行了内容增强,可能需要重新评估
if not is_sufficient and enhancement_status == "completed" and enhanced_sources_count > 0:
print(f"📈 内容增强完成 ({enhanced_sources_count} 个源),提升充足性评估")
# 给增强内容一定的"加分"
enhancement_boost = min(enhanced_sources_count * 0.3, 0.8)
if enhancement_boost >= 0.6:
print(f" ✅ 基于内容增强结果,判定信息已充足")
is_sufficient = True
# 准备follow-up查询(如果需要继续研究)
follow_up_queries = reflection_follow_up_queries or []
if not follow_up_queries and not is_sufficient:
# 如果没有follow-up查询但信息不充足,生成简单的查询
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
if plan and current_pointer < len(plan):
task_description = plan[current_pointer]["description"]
follow_up_queries = [f"more details about {task_description}"]
# 记录评估结果到状态
final_decision = is_sufficient or research_loop_count >= max_research_loops
print(f"🏁 研究评估完成 - 充足性: {is_sufficient}, 循环次数: {research_loop_count}/{max_research_loops}")
if enhancement_status == "completed":
print(f" 🔥 本轮包含Firecrawl内容增强: {enhanced_sources_count} 个源")
return {
"evaluation_is_sufficient": is_sufficient,
"evaluation_should_continue": not final_decision,
"evaluation_follow_up_queries": follow_up_queries,
"evaluation_research_complete": final_decision,
"evaluation_enhancement_boost": enhanced_sources_count if enhancement_status == "completed" else 0
}
def decide_next_research_step(state: OverallState):
"""
条件边函数 - 决定研究是否完成还是继续
可以返回字符串路由或Send对象列表
"""
# 从状态中获取评估结果
should_continue = state.get("evaluation_should_continue", False)
research_complete = state.get("evaluation_research_complete", False)
if research_complete or not should_continue:
print("🏁 研究流程完成,记录任务结果")
return "record_task_completion"
else:
print("🔄 继续研究,执行follow-up查询")
# 生成follow-up查询的Send对象
follow_up_queries = state.get("evaluation_follow_up_queries", [])
if not follow_up_queries:
print("⚠️ 没有follow-up查询,直接完成")
return "record_task_completion"
# Get current task info for follow-up research
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task_id = "unknown"
if plan and current_pointer < len(plan):
current_task_id = plan[current_pointer]["id"]
print(f"🔄 生成 {len(follow_up_queries)} 个follow-up查询")
# 返回follow-up查询的Send列表
from langgraph.types import Send
return [
Send(
"web_research",
{
"search_query": follow_up_query,
"id": state.get("number_of_ran_queries", 0) + int(idx),
"current_task_id": current_task_id
},
)
for idx, follow_up_query in enumerate(follow_up_queries)
]
def finalize_answer(state: OverallState, config: RunnableConfig) -> dict:
"""
Generate the final research report using holistic integration of all research findings.
OPTIMIZATION STRATEGY:
This function implements a comprehensive refactor from the previous task-segmented approach
to a unified holistic integration strategy. Instead of concatenating individual task sections,
it synthesizes all research data through a single LLM call for coherent narrative flow.
KEY IMPROVEMENTS:
1. Cross-task data aggregation: Combines findings from all research streams
2. Thematic organization: Structures content by analytical themes, not task boundaries
3. Executive-grade synthesis: Generates consulting-quality integrated reports
4. Narrative coherence: Maintains unified strategic perspective throughout
INPUT SOURCES:
- Task-specific research results from ledger
- Detailed research content from task_specific_results
- Source attribution from sources_gathered
- Original user query and research plan context
OUTPUT:
Unified professional research report with integrated analysis across all investigation areas.
"""
try:
configurable = Configuration.from_runnable_config(config)
llm = ChatGoogleGenerativeAI(
model=configurable.reflection_model,
temperature=0.3,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
plan = state.get("plan", [])
user_query = state.get("user_query", "Research Analysis")
if not plan:
return {
"messages": [AIMessage(content="No research plan available to generate report")],
"final_report_markdown": "No research plan available to generate report"
}
# Build comprehensive research dataset from all sources
ledger = state.get("ledger", [])
task_specific_results = state.get("task_specific_results", [])
sources_gathered = state.get("sources_gathered", [])
# Create research plan summary for context
research_plan_summary = "\n".join([
f"• {task['description']}" for task in plan
])
# Aggregate all research findings with proper attribution
comprehensive_research_data = []
# Add comprehensive findings from ledger with all available detail
for entry in ledger:
detailed_snippets = entry.get('detailed_snippets', [])
citations = entry.get('citations_for_snippets', [])
# Build comprehensive task context with all available information
task_context = f"""
RESEARCH FOCUS: {entry['description']}
KEY FINDINGS: {entry['findings_summary']}
DETAILED RESEARCH CONTENT:
{chr(10).join(detailed_snippets)}
SUPPORTING CITATIONS:
{chr(10).join([f"- {cite.get('snippet', '')[:200]}... [Source: {cite.get('source', 'Unknown')}]" for cite in citations[:5]])}
"""
comprehensive_research_data.append(task_context)
# Add task-specific detailed results with enhanced context
for result in task_specific_results:
sources_info = ""
if result.get('sources'):
sources_list = [f"- {source.get('title', 'Unknown')} ({source.get('url', 'N/A')})"
for source in result.get('sources', [])[:3]]
sources_info = f"\nSOURCES:\n{chr(10).join(sources_list)}"
task_detail = f"""
RESEARCH STREAM: {result.get('task_id', 'Unknown')}
CONTENT: {result.get('content', '')}
TIMESTAMP: {result.get('timestamp', '')}{sources_info}
"""
comprehensive_research_data.append(task_detail)
# Build source mapping for citation conversion
source_mapping = build_source_mapping(sources_gathered)
# Combine all research data
research_dataset = "\n" + "="*80 + "\n".join(comprehensive_research_data)
# Convert citations to readable format
research_dataset = convert_citations_to_readable(research_dataset, source_mapping)
# Apply token limits to prevent API overload
research_dataset_batches = split_by_tokens([research_dataset], max_tokens=120000)
final_research_data = "\n\n".join(research_dataset_batches[0]) if research_dataset_batches else ""
# REPORT-LEVEL ENHANCEMENT: Analyze if additional targeted content is needed
try:
from agent.report_level_enhancement import integrate_report_enhancement_into_finalize
# Convert sources_gathered to the format expected by report enhancement
available_sources = []
for source in sources_gathered:
if isinstance(source, dict):
available_sources.append({
'title': source.get('title', ''),
'url': source.get('url', ''),
'snippet': source.get('snippet', '')
})
print(f"🎯 启动报告级别增强分析...")
enhanced_research_data, enhancement_results = integrate_report_enhancement_into_finalize(
user_query=user_query,
research_plan=plan,
aggregated_research_data=final_research_data,
available_sources=available_sources,
config=config
)
# Use enhanced data if available
final_research_data = enhanced_research_data
# Log enhancement results
successful_enhancements = [r for r in enhancement_results if r.success]
if successful_enhancements:
print(f"✅ 报告级别增强成功: {len(successful_enhancements)} 个增强点")
for result in successful_enhancements:
print(f" - 质量: {result.enhancement_quality}, 源数量: {len(result.sources_used)}")
else:
print("ℹ️ 报告级别增强: 未执行或无有效增强")
except Exception as e:
print(f"⚠️ 报告级别增强异常,继续使用原始数据: {str(e)}")
# Continue with original data if enhancement fails
# Generate integrated report using the enhanced holistic approach
formatted_prompt = integrated_report_instructions.format(
user_query=user_query,
research_plan_summary=research_plan_summary,
comprehensive_research_data=final_research_data
)
print(f"🔄 Generating integrated report for: {user_query}")
print(f"📊 Research data length: {len(final_research_data)} characters")
print(f"📋 Tasks integrated: {len(plan)} research streams")
# Generate the final integrated report
integrated_report = llm.invoke(formatted_prompt).content
# Apply final quality improvements
integrated_report = clean_generated_content(integrated_report)
integrated_report = remove_prompt_remnants(integrated_report)
integrated_report = final_quality_check(integrated_report)
print(f"✅ Integrated report generated: {len(integrated_report)} characters")
return {
"messages": [AIMessage(content=integrated_report)],
"final_report_markdown": integrated_report
}
except Exception as e:
error_message = f"Error generating integrated report: {str(e)}"
print(f"❌ {error_message}")
return {
"messages": [AIMessage(content=error_message)],
"final_report_markdown": error_message
}
def build_source_mapping(sources_gathered):
"""构建源文件映射,用于引用转换"""
mapping = {}
for i, source in enumerate(sources_gathered):
# Extract domain from URL for readable citation
original_url = source.get("value", "")
domain = extract_domain(original_url)
label = source.get("label", domain)
# Create mapping for different citation formats
short_url = source.get("short_url", "")
if short_url:
# Extract ID from short URL
import re
id_match = re.search(r'/id/([^/]+)', short_url)
if id_match:
citation_id = id_match.group(1)
mapping[citation_id] = {
"label": label,
"domain": domain,
"value": original_url if original_url and not original_url.startswith('https://vertexaisearch') else ""
}
# Also try direct URL mapping if available
if original_url and not original_url.startswith('https://vertexaisearch'):
# Create a simple mapping using domain as key
domain_key = domain.lower().replace(' ', '')
mapping[domain_key] = {
"label": label,
"domain": domain,
"value": original_url
}
return mapping
def extract_domain(url):
"""从URL中提取域名"""
import re
if not url:
return "Unknown"
# Extract domain from URL
domain_match = re.search(r'https?://(?:www\.)?([^/]+)', url)
if domain_match:
domain = domain_match.group(1)
# Simplify common domains
if "google.com" in domain:
return "Google"
elif "wikipedia" in domain:
return "Wikipedia"
elif "youtube" in domain:
return "YouTube"
else:
return domain.split('.')[0].title()
return "Web Source"
def convert_citations_to_readable(content, source_mapping):
"""Convert raw citation markers to readable, verifiable citation formats with complete source information"""
import re
def replace_citation(match):
citation_id = match.group(1)
if citation_id in source_mapping:
source_info = source_mapping[citation_id]
# Create comprehensive citation with verifiable information
domain = source_info.get('domain', 'Unknown Source')
url = source_info.get('value', '')
label = source_info.get('label', domain)
# Format: [Source: Domain (URL)] for verifiability
if url and url.startswith('http') and 'vertexaisearch.cloud.google.com' not in url:
return f"[Source: {label} ({url})]"
else:
return f"[Source: {label}]"
return f"[Source: {citation_id}]" # Fallback with original ID
# Convert Vertex AI citations with full source information
content = re.sub(r'\[vertexaisearch\.cloud\.google\.com/id/([^\]]+)\]',
replace_citation, content)
# Convert other citation formats while preserving source identification
content = re.sub(r'\[([a-z0-9\-]+)\]', replace_citation, content)
# Clean up any remaining malformed citations
content = clean_malformed_citations(content)
return content
def clean_malformed_citations(content):
"""Clean up malformed citation formats in content"""
import re
# Fix mixed citation formats like [Source: domain](https://vertexaisearch...)
content = re.sub(r'\[Source: ([^\]]+)\]\(https://vertexaisearch\.cloud\.google\.com[^)]*\)',
r'[Source: \1]', content)
# Remove any remaining vertexaisearch URLs that shouldn't be there
content = re.sub(r'\(https://vertexaisearch\.cloud\.google\.com[^)]*\)', '', content)
# Fix double closing brackets
content = re.sub(r'\]\]', ']', content)
return content
def clean_generated_content(content):
"""清理生成内容中的元文本和无关信息"""
if not content:
return content
# Remove common meta-text at beginning
meta_prefixes = [
"here is", "this is", "based on", "according to", "好的", "根据",
"以下是", "here's", "below is", "following is"
]
lines = content.split('\n')
cleaned_lines = []
for line in lines:
line = line.strip()
if line:
# Skip lines that start with meta-text
line_lower = line.lower()
is_meta = any(line_lower.startswith(prefix) for prefix in meta_prefixes)
if not is_meta:
cleaned_lines.append(line)
return '\n'.join(cleaned_lines)
def remove_prompt_remnants(content):
"""移除内容中的Prompt残留"""
import re
# Remove instruction-like text
content = re.sub(r'INSTRUCTIONS?:.*?(?=\n\n|\n[A-Z]|\Z)', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'REQUIREMENTS?:.*?(?=\n\n|\n[A-Z]|\Z)', '', content, flags=re.DOTALL | re.IGNORECASE)
content = re.sub(r'IMPORTANT:.*?(?=\n\n|\n[A-Z]|\Z)', '', content, flags=re.DOTALL | re.IGNORECASE)
# Remove standalone bullets or dashes
content = re.sub(r'^\s*[-•]\s*$', '', content, flags=re.MULTILINE)
# Remove multiple consecutive line breaks
content = re.sub(r'\n{3,}', '\n\n', content)
return content.strip()
def final_quality_check(content):
"""Final quality check and cleanup while preserving citation URLs and source information"""
import re
# Remove standalone URLs that are NOT part of citations
# Use a different approach to preserve citation URLs
lines = content.split('\n')
cleaned_lines = []
for line in lines:
# Check if the line contains a citation with URL
if '[Source:' in line and 'http' in line:
# Keep lines with citations intact
cleaned_lines.append(line)
else:
# Remove standalone URLs from lines without citations
cleaned_line = re.sub(r'\bhttps?://[^\s\[\]]+', '', line)
cleaned_lines.append(cleaned_line)
content = '\n'.join(cleaned_lines)
# Fix spacing issues
content = re.sub(r'\n{3,}', '\n\n', content)
content = re.sub(r'[ \t]+', ' ', content)
# Remove standalone punctuation lines
content = re.sub(r'^\s*[-.•]+\s*$', '', content, flags=re.MULTILINE)
# Ensure proper spacing around headers
content = re.sub(r'\n(#+[^\n]+)\n', r'\n\n\1\n\n', content)
# Clean up extra spaces around citations
content = re.sub(r'\s+(\[Source:[^\]]+\])', r' \1', content)
# Final citation cleanup
content = clean_malformed_citations(content)
return content.strip()
def planner_node(state: OverallState, config: RunnableConfig) -> dict:
"""LangGraph node that generates a multi-step research plan based on the user's question."""
configurable = Configuration.from_runnable_config(config)
llm = ChatGoogleGenerativeAI(
model=configurable.query_generator_model,
temperature=0.7,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
structured_llm = llm.with_structured_output(ResearchPlan)
# Get user query, prioritize from user_query, fallback to messages
user_query = state.get("user_query") or get_research_topic(state["messages"])
# Use centrally managed planning prompt
formatted_prompt = planning_instructions.format(user_query=user_query)
try:
result = structured_llm.invoke(formatted_prompt)
# Convert ResearchPlan to expected format
plan = [{"id": task.id, "description": task.description, "info_needed": True, "source_hint": task.description, "status": "pending"} for task in result.tasks]
return {
"user_query": user_query,
"plan": plan,
"current_task_pointer": 0
}
except Exception as e:
print(f"Planning failed: {e}")
# Provide default single-task plan as fallback
return {
"user_query": user_query,
"plan": [{"id": "task-1", "description": f"Research and answer: {user_query}", "info_needed": True, "source_hint": user_query, "status": "pending"}],
"current_task_pointer": 0
}
def record_task_completion_node(state: OverallState, config: RunnableConfig) -> dict:
"""Record the findings for the current task and prepare for the next task."""
try:
# Get current task info
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
if not plan or current_pointer >= len(plan):
return {
"messages": [AIMessage(content="Error: Invalid task pointer or empty plan")],
"next_node_decision": "end"
}
current_task = plan[current_pointer]
current_task_id = current_task.get("id")
# Get detailed findings for current task
detailed_findings = state.get("current_task_detailed_findings", [])
task_specific_findings = [
finding["content"] for finding in detailed_findings
if finding.get("task_id") == current_task_id
]
# If no task-specific findings found, try to get recent web results as fallback
if not task_specific_findings:
print(f"Warning: No task-specific findings found for task {current_task_id}, using recent web results as fallback")
web_results = state.get("web_research_result", [])
# Take the most recent results (assume they belong to current task)
task_specific_findings = web_results[-3:] if len(web_results) > 3 else web_results
# Generate task summary
task_summary = _summarize_task_findings(
current_task["description"],
task_specific_findings,
config
)
# Create citations from detailed findings
citations_for_snippets = []
for finding in detailed_findings:
if finding.get("task_id") == current_task_id and finding.get("source"):
citations_for_snippets.append({
"snippet": finding["content"],
"source": str(finding["source"])
})
# Create ledger entry with detailed findings
ledger_entry = {
"task_id": current_task_id,
"description": current_task["description"],
"findings_summary": task_summary,
"detailed_snippets": task_specific_findings,
"citations_for_snippets": citations_for_snippets
}
# Update plan status
plan[current_pointer]["status"] = "completed"
# Clear current task findings to prepare for next task
return {
"ledger": [ledger_entry],
"global_summary_memory": [task_summary],
"plan": plan,
"current_task_pointer": current_pointer + 1,
"current_task_detailed_findings": [], # Clear for next task
"next_node_decision": "continue" if current_pointer + 1 < len(plan) else "end"
}
except Exception as e:
error_message = f"Error in record_task_completion_node: {str(e)}"
print(error_message)
return {
"messages": [AIMessage(content=error_message)],
"next_node_decision": "end"
}
def _summarize_task_findings(task_description: str, web_results: List[str], config: RunnableConfig) -> str:
"""Helper function to summarize web research results for a specific task."""
if not web_results:
return f"No specific findings available for task: {task_description}"
# Use recent results (last 3 entries) to avoid overwhelming context
recent_results = web_results[-3:] if len(web_results) > 3 else web_results
context_to_summarize = "\n---\n".join(recent_results)
configurable = Configuration.from_runnable_config(config)
llm = ChatGoogleGenerativeAI(
model=configurable.reflection_model,
temperature=0.3,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
prompt = f"""Given the research task: "{task_description}"
And the following research findings:
{context_to_summarize}
Please provide a concise summary (1-2 sentences) of the key findings that directly address this specific task.
Task Summary:"""
try:
response = llm.invoke(prompt)
return response.content if hasattr(response, 'content') else str(response)
except Exception as e:
print(f"Task summarization failed: {e}")
return f"Completed research for: {task_description}"
def decide_next_step_in_plan(state: OverallState) -> str:
"""Conditional edge function that determines whether to continue with next task or finalize."""
current_pointer = state.get("current_task_pointer", 0)
plan = state.get("plan", [])
if current_pointer < len(plan):
print(f"--- Moving to next task (pointer: {current_pointer}) ---")
return "generate_query"
else:
print("--- All tasks completed. Finalizing answer ---")
return "finalize_answer"
# Create our Agent Graph
builder = StateGraph(OverallState, config_schema=Configuration)
# Define the nodes we will cycle between
builder.add_node("planner", planner_node)
builder.add_node("generate_query", generate_query)
builder.add_node("web_research", web_research)
builder.add_node("reflection", reflection)
builder.add_node("content_enhancement", content_enhancement_analysis) # Enhanced content analysis node
builder.add_node("evaluate_research_enhanced", evaluate_research_enhanced) # Enhanced research evaluation node
builder.add_node("record_task_completion", record_task_completion_node) # Task completion recording node
builder.add_node("finalize_answer", finalize_answer)
# Set the entrypoint as `planner`
builder.add_edge(START, "planner")
builder.add_edge("planner", "generate_query")
# Add conditional edge to continue with search queries in a parallel branch
builder.add_conditional_edges(
"generate_query", continue_to_web_research, ["web_research"]
)
# Reflect on the web research
builder.add_edge("web_research", "reflection")
# Modified routing logic after reflection - added intelligent content enhancement decision
builder.add_conditional_edges(
"reflection",
should_enhance_content,
{
"analyze_enhancement_need": "content_enhancement",
"continue_without_enhancement": "evaluate_research_enhanced"
}
)
# Enter evaluation phase after content enhancement completion
builder.add_edge("content_enhancement", "evaluate_research_enhanced")
# Decide next step after evaluation completion - continue research or complete task
builder.add_conditional_edges(
"evaluate_research_enhanced",
decide_next_research_step,
["web_research", "record_task_completion"] # Can route to these two targets
)
# 当decide_next_research_step返回"continue_research"时,使用follow-up查询
# 这将通过continue_research_with_followup函数生成新的web_research任务
# After recording task completion, decide next step in plan (multi-task loop)
builder.add_conditional_edges(
"record_task_completion",
decide_next_step_in_plan,
["generate_query", "finalize_answer"]
)
# Finalize the answer
builder.add_edge("finalize_answer", END)
graph = builder.compile(name="pro-search-agent")
def split_by_tokens(texts, max_tokens=150000, encoding_name="cl100k_base"):
"""智能分批处理文本,保留重要上下文和信息完整性"""
try:
encoding = tiktoken.get_encoding(encoding_name)
except ImportError:
# Fallback to simple character-based estimation
return simple_split_by_chars(texts, max_tokens * 4) # Rough estimation: 4 chars per token
batches = []
current_batch = []
current_tokens = 0
for text in texts:
if not text:
continue
text_tokens = len(encoding.encode(str(text)))
# If single text is too large, intelligently extract key sections
if text_tokens > max_tokens * 0.8:
text = extract_key_sections(text, max_tokens * 0.7, encoding)
text_tokens = len(encoding.encode(str(text)))
# Check if adding this text would exceed the limit
if current_tokens + text_tokens > max_tokens and current_batch:
# Finalize current batch
batches.append(current_batch)
current_batch = [text]
current_tokens = text_tokens
else:
current_batch.append(text)
current_tokens += text_tokens
# Add the last batch if it has content
if current_batch:
batches.append(current_batch)
return batches
def extract_key_sections(content, max_tokens, encoding):
"""从长内容中智能提取关键部分,优先保留重要信息"""
if not content:
return content
# Split content into sections
sections = content.split('\n\n')
key_sections = []
tokens_used = 0
priority_sections = []
regular_sections = []
# Categorize sections by importance
for section in sections:
if is_factual_section(section):
priority_sections.append(section)
else:
regular_sections.append(section)
# Add priority sections first
for section in priority_sections:
section_tokens = len(encoding.encode(section))
if tokens_used + section_tokens <= max_tokens:
key_sections.append(section)
tokens_used += section_tokens
elif is_critical_section(section):
# For critical sections, truncate but include
truncated = truncate_section(section, max_tokens - tokens_used, encoding)
if truncated:
key_sections.append(truncated)
break
# Add regular sections if space allows
for section in regular_sections:
section_tokens = len(encoding.encode(section))
if tokens_used + section_tokens <= max_tokens:
key_sections.append(section)
tokens_used += section_tokens
else:
break
return '\n\n'.join(key_sections)
def is_factual_section(section):
"""判断段落是否包含重要事实信息"""
factual_indicators = [
r'\d{4}', # Years
r'\$[\d,]+', # Money amounts
r'\d+%', # Percentages
r'\d+\.?\d*\s*(million|billion|thousand)', # Large numbers
r'(acquired|purchased|bought|sold)', # Business actions
r'(announced|launched|released)', # Event verbs
r'[A-Z][a-z]+\s+(Inc|Corp|Ltd|Company)', # Company names
]
import re
for pattern in factual_indicators:
if re.search(pattern, section, re.IGNORECASE):
return True
return False
def is_critical_section(section):
"""判断是否为关键段落(即使超长也要保留)"""
critical_keywords = [
'acquisition', 'merger', 'financial', 'revenue', 'profit',
'strategy', 'impact', 'result', 'conclusion', 'summary'
]
section_lower = section.lower()
return any(keyword in section_lower for keyword in critical_keywords)
def truncate_section(section, max_tokens, encoding):
"""智能截取段落,保留最重要的部分"""
if not section:
return ""
sentences = section.split('. ')
truncated_sentences = []
tokens_used = 0
for sentence in sentences:
sentence_tokens = len(encoding.encode(sentence))
if tokens_used + sentence_tokens <= max_tokens:
truncated_sentences.append(sentence)
tokens_used += sentence_tokens
else:
break
result = '. '.join(truncated_sentences)
if result and not result.endswith('.'):
result += '.'
return result
def simple_split_by_chars(texts, max_chars):
"""字符级别的简单分批(备用方案)"""
batches = []
current_batch = []
current_chars = 0
for text in texts:
text_chars = len(str(text))
if current_chars + text_chars > max_chars and current_batch:
batches.append(current_batch)
current_batch = [text]
current_chars = text_chars
else:
current_batch.append(text)
current_chars += text_chars
if current_batch:
batches.append(current_batch)
return batches
================================================
FILE: backend/src/agent/prompts.py
================================================
from datetime import datetime
# Get current date in a readable format
def get_current_date():
return datetime.now().strftime("%B %d, %Y")
query_writer_instructions = """You are a **QueryGenerationAgent** responsible for creating comprehensive, targeted search queries.
=== TASK ===
Generate {number_queries} diverse, specific search queries that will gather detailed, comprehensive information about the research topic.
=== RESEARCH STRATEGY ===
1. **Specificity**: Create queries targeting specific aspects, data points, case studies, and technical details
2. **Multi-angle approach**: Cover different perspectives, time periods, and geographical regions
3. **Technical depth**: Include queries for technical specifications, implementation details, and performance metrics
4. **Data-focused**: Target queries likely to return statistical data, reports, and detailed analysis
5. **Source diversity**: Ensure queries will hit different types of sources (academic, industry, news, government)
=== QUERY QUALITY CRITERIA ===
Each query should:
- Target specific, actionable information rather than general overviews
- Include relevant technical terms and industry keywords
- Specify timeframes, locations, or scale when relevant
- Aim for sources likely to contain detailed data and analysis
- Be distinct enough to avoid duplicate information
=== EXAMPLES OF GOOD vs POOR QUERIES ===
Research Topic: "Smart city transportation trends 2024"
POOR (too general):
- "smart city transportation"
- "smart city trends 2024"
GOOD (specific and detailed):
- "smart city autonomous vehicle deployment statistics 2024"
- "IoT traffic management systems case studies major cities 2024"
- "smart city public transport electrification data Europe Asia 2024"
- "AI-powered traffic optimization ROI metrics smart cities 2024"
=== CURRENT RESEARCH CONTEXT ===
Current Date: {current_date}
Research Topic: {research_topic}
=== OUTPUT REQUIREMENTS ===
Generate exactly {number_queries} search queries that will maximize the collection of detailed, specific information.
Focus on queries that will return comprehensive data, technical details, case studies, and implementation specifics.
IMPORTANT: Return only the search queries in the specified JSON format."""
web_searcher_instructions = """You are a **WebResearcher** agent responsible for gathering and extracting detailed information from web searches.
=== TASK ===
Conduct targeted Google Searches to gather comprehensive, credible information about the research topic.
=== INFORMATION EXTRACTION STRATEGY ===
1. **Preserve original details**: Include specific data points, statistics, dates, and technical specifications
2. **Extract key facts**: Pull out concrete information, case studies, and implementation details
3. **Maintain source context**: Keep important quotes and specific findings from sources
4. **Include diverse perspectives**: Gather information from multiple source types and viewpoints
5. **Technical depth**: Extract implementation details, performance metrics, and technical specifications
=== CONTENT REQUIREMENTS ===
Your output should prioritize:
1. **Specific data points**: Numbers, percentages, dates, costs, performance metrics
2. **Concrete examples**: Real projects, case studies, implementation examples
3. **Technical details**: How technologies work, system architectures, integration approaches
4. **Current information**: Recent developments, 2024 trends, latest implementations
5. **Authoritative sources**: Government reports, research papers, industry analyses
=== OUTPUT FORMAT ===
Structure your findings as:
1. **Key Statistics and Data**: Present specific numbers, metrics, and quantitative findings
2. **Technology Implementations**: Describe specific systems, architectures, and technical approaches
3. **Case Studies and Examples**: Detail real-world implementations with concrete details
4. **Current Trends and Developments**: Latest innovations and market movements
5. **Challenges and Solutions**: Specific problems and technical solutions being implemented
=== QUALITY STANDARDS ===
- Include specific citations for each major point
- Preserve technical terminology and specifications
- Extract detailed implementation approaches
- Include performance benchmarks and comparative data
- Maintain chronological context (emphasize 2024 developments)
=== CURRENT RESEARCH CONTEXT ===
Current Date: {current_date}
Research Topic: {research_topic}
IMPORTANT: Focus on extracting and preserving detailed, specific information from search results rather than creating high-level summaries. The goal is to gather comprehensive raw information that can be used for detailed analysis."""
reflection_instructions = """You are a **ResearchAnalyst** agent responsible for evaluating research comprehensiveness and depth.
=== TASK ===
Analyze the provided research summaries to determine if they contain sufficient detail and breadth to answer the research question comprehensively.
=== EVALUATION FRAMEWORK ===
**SUFFICIENT RESEARCH** should include:
1. **Quantitative data**: Specific statistics, percentages, dollar amounts, dates
2. **Multiple perspectives**: Different geographical regions, market segments, or approaches
3. **Technical specifics**: Implementation details, technical specifications, performance metrics
4. **Current examples**: Recent case studies, pilot projects, deployed solutions
5. **Comprehensive coverage**: Multiple aspects of the research topic addressed
**EVALUATION CRITERIA**:
- **Comprehensive (sufficient=true)**: Rich with specific data, multiple examples, technical details, current information
- **Surface-level (sufficient=false)**: Lacks specific data, few concrete examples, missing technical depth
=== QUALITY THRESHOLDS ===
Mark as **sufficient=true** if the research includes:
- At least 5-8 specific data points or statistics
- Multiple concrete examples or case studies
- Technical implementation details
- Geographic or market diversity in examples
- Recent (2024) information and trends
Mark as **sufficient=false** only if research is clearly:
- Too high-level or conceptual
- Missing key technical aspects
- Lacking concrete examples or data
- Insufficient depth for comprehensive analysis
=== FOLLOW-UP QUERY STRATEGY ===
If research is insufficient, generate 3-5 targeted queries to fill specific gaps:
- Target missing data types (quantitative, technical, geographic)
- Focus on specific implementation details or metrics
- Address underrepresented aspects of the topic
=== OUTPUT FORMAT ===
Return a JSON object with these exact keys:
{{
"is_sufficient": true/false,
"knowledge_gap": "Specific description of what information is missing or insufficient",
"follow_up_queries": ["specific query 1", "specific query 2", ...]
}}
=== CURRENT RESEARCH CONTEXT ===
Current Date: {current_date}
Research Topic: {research_topic}
Research Summaries to Analyze:
{summaries}
IMPORTANT: Focus on whether the research provides sufficient detail and specificity for a comprehensive analysis, not whether it's "perfect"."""
answer_instructions = """You are a **Senior Research Analyst** at a leading global research consultancy firm. You are responsible for producing executive-level research reports for Fortune 500 clients.
=== PROFESSIONAL CONTEXT ===
Your audience consists of:
- C-suite executives and board members
- Strategic planners and business development teams
- Investment committees and venture capital firms
- Government policy makers and regulatory bodies
=== REPORT QUALITY STANDARDS ===
As a premium research consultancy, your reports must demonstrate:
- **Strategic insight**: Beyond data presentation to actionable intelligence
- **Market expertise**: Deep understanding of industry dynamics and competitive landscape
- **Executive focus**: Clear implications for business strategy and decision-making
- **Professional credibility**: Authoritative tone with rigorous methodology
=== REPORT STRUCTURE REQUIREMENTS ===
Your comprehensive report must include:
1. **Executive Summary** (2-3 paragraphs)
- Key findings and strategic implications
- Critical market trends and drivers
- Primary recommendations for stakeholders
2. **Methodology & Scope**
- Research approach and data sources
- Analysis framework and validation methods
- Limitations and scope of study
3. **Core Analysis Sections** (organized by research objectives)
- Market landscape and competitive dynamics
- Technology trends and innovation drivers
- Implementation case studies and best practices
- Challenges, barriers, and risk factors
4. **Strategic Implications & Recommendations**
- Business impact analysis
- Investment and policy recommendations
- Future outlook and emerging opportunities
5. **Conclusion & Next Steps**
- Summary of critical findings
- Strategic priorities for stakeholders
- Areas for continued monitoring
=== WRITING STYLE GUIDELINES ===
- **Authoritative but accessible**: Professional language without unnecessary jargon
- **Data-driven narratives**: Every claim supported by evidence and context
- **Strategic perspective**: Focus on "what this means" rather than just "what is"
- **Executive brevity**: Concise yet comprehensive coverage
- **Human insight**: Provide interpretation and judgment, not just data aggregation
=== CITATION & SOURCE STANDARDS ===
- Integrate sources naturally within the narrative flow
- Use professional attribution: "According to McKinsey research..." rather than [Source: mckinsey]
- Prioritize authoritative sources: industry reports, academic research, government data
- Provide context for data points: trends, comparisons, significance
=== OUTPUT FORMAT ===
Structure as a professional consulting report:
- Clear section headers with strategic focus
- Executive summary highlighting key insights
- Logical flow from analysis to implications
- Professional formatting with bullet points and subheadings
- Integrated citations that enhance credibility
=== CURRENT ASSIGNMENT ===
Research Topic: {research_topic}
Report Date: {current_date}
Research Findings:
{summaries}
IMPORTANT: Transform these research findings into a polished, executive-level report that demonstrates the analytical rigor and strategic insight expected from a top-tier consulting firm. Focus on delivering actionable intelligence rather than raw information compilation."""
planning_instructions = """You are **PlannerAgent**. Your job is to analyze the user research query and break it down into multiple specific, executable research tasks.
=== TASK ANALYSIS PRINCIPLES ===
1. **Decompose complex queries**: Break broad topics into specific, manageable subtasks
2. **Identify key dimensions**: Extract different aspects, categories, or domains
3. **Create parallel tasks**: Generate 2-5 focused tasks that can be researched independently
4. **Ensure comprehensive coverage**: All important aspects should be covered
=== TASK BREAKDOWN STRATEGY ===
For research queries, consider these dimensions:
- **Domain separation**: Split different fields/industries (e.g., transportation vs energy)
- **Geographic scope**: Different regions or global vs local
- **Temporal focus**: Current trends vs future projections vs historical analysis
- **Technical depth**: Overview vs implementation details vs case studies
- **Stakeholder perspective**: Government, industry, technology, user impact
=== OUTPUT FORMAT ===
Return a single JSON array inside ```PLAN``` fences.
Each element must contain the following fields **in this order**:
{{
"id": "",
"description": "",
"info_needed": true | false,
"source_hint": "",
"status": "pending"
}}
=== PLANNING EXAMPLES ===
**Example 1**: User Query: "Research AI impact on healthcare"
```PLAN
[
{{
"id": "ai-diagnostics",
"description": "Research AI applications in medical diagnostics and imaging",
"info_needed": true,
"source_hint": "AI medical diagnostics imaging radiology machine learning healthcare 2024",
"status": "pending"
}},
{{
"id": "ai-treatment",
"description": "Research AI-driven treatment recommendations and drug discovery",
"info_needed": true,
"source_hint": "AI treatment recommendations drug discovery personalized medicine",
"status": "pending"
}},
{{
"id": "ai-healthcare-challenges",
"description": "Analyze challenges and ethical considerations of AI in healthcare",
"info_needed": true,
"source_hint": "AI healthcare ethics privacy challenges regulatory issues",
"status": "pending"
}}
]
```
**Example 2**: User Query: "Smart city transportation and energy trends 2024"
```PLAN
[
{{
"id": "smart-transportation-2024",
"description": "Research 2024 smart city transportation technologies and trends",
"info_needed": true,
"source_hint": "smart city transportation 2024 IoT traffic management autonomous vehicles",
"status": "pending"
}},
{{
"id": "smart-energy-2024",
"description": "Research 2024 smart city energy systems and sustainability trends",
"info_needed": true,
"source_hint": "smart city energy 2024 renewable smart grid energy management",
"status": "pending"
}},
{{
"id": "transport-energy-integration",
"description": "Analyze integration between smart transportation and energy systems",
"info_needed": true,
"source_hint": "smart city transport energy integration electric vehicles charging infrastructure",
"status": "pending"
}}
]
```
=== REQUIREMENTS ===
1. **Always create 2-5 tasks** (never just 1 unless the query is extremely specific)
2. **Each task should be focused and specific**
3. **Tasks should be complementary but independent**
4. **Use descriptive, actionable task descriptions**
5. **Provide targeted source hints for each task**
6. **Total top-level steps ≤ 5**
=== CURRENT RESEARCH QUERY ===
User Query: {user_query}
=== INSTRUCTIONS ===
Analyze the user query and break it down into specific research tasks. Focus on creating multiple focused tasks rather than one broad task. Output **only** the JSON array inside ```PLAN``` fences."""
integrated_report_instructions = """You are a **Senior Research Director** at a premier global consulting firm, responsible for synthesizing complex multi-faceted research into cohesive strategic intelligence reports.
=== LANGUAGE ADAPTATION ===
**CRITICAL**: Respond in the SAME LANGUAGE as the original user query.
- If user query is in Chinese (中文), write the entire report in Chinese
- If user query is in English, write the entire report in English
- Maintain professional terminology and industry-specific language in the appropriate language
- Use native language conventions for citations, formatting, and professional writing
=== SYNTHESIS MISSION ===
Transform the provided research findings from multiple investigation streams into a unified, comprehensive professional analysis that reads as a single coherent narrative, not a collection of separate studies or high-level summaries.
=== INPUT CONTEXT ===
- Original Research Query: {user_query}
- Research Plan: {research_plan_summary}
- Complete Research Dataset: Multiple investigation streams with varying focus areas
- Target Audience: Industry professionals, researchers, business analysts requiring detailed insights
=== REPORT ARCHITECTURE PRINCIPLES ===
**1. DETAILED PROFESSIONAL ANALYSIS (Not Executive Summary)**
- Provide comprehensive, detailed analysis with specific data, metrics, and examples
- Include technical details, implementation specifics, and concrete case studies
- Present thorough research findings with supporting evidence and quantitative data
- Maintain depth and specificity throughout - this is NOT a summary document
**2. THEMATIC INTEGRATION WITH DEPTH**
- Organize by analytical themes while maintaining detailed coverage of each area
- Identify cross-cutting insights supported by specific evidence and data points
- Build narrative bridges between different aspects with concrete connections
- Present unified analysis while preserving the richness of detailed findings
**3. PROFESSIONAL RESEARCH STANDARDS**
- Comprehensive analysis with detailed methodology and findings
- Extensive use of specific data, statistics, and concrete examples
- Thorough coverage of technical aspects and implementation details
- Rich sourcing with complete, accurate, and verifiable citations
=== REQUIRED REPORT STRUCTURE ===
**COMPREHENSIVE OVERVIEW**
- Detailed introduction to the research scope and methodology
- Complete context setting with market sizing, key players, and current landscape
- Specific quantitative and qualitative indicators
- Thorough background establishing the foundation for detailed analysis
**DETAILED FINDINGS & ANALYSIS**
- In-depth analysis of each major research area with supporting data
- Specific technical details, implementation approaches, and case studies
- Comprehensive coverage of trends, technologies, and market dynamics
- Detailed examination of challenges, opportunities, and solution approaches
- Rich integration of cross-domain insights with specific supporting evidence
**TECHNICAL IMPLEMENTATIONS & CASE STUDIES**
- Detailed implementation examples with specific technical specifications
- Comprehensive case study analysis with concrete outcomes and metrics
- Thorough coverage of best practices and lessons learned
- Specific technology deployments, performance data, and success metrics
**MARKET DYNAMICS & COMPETITIVE LANDSCAPE**
- Detailed competitive analysis with specific market share data and positioning
- Comprehensive regulatory environment analysis with specific policy impacts
- Thorough investment landscape with specific funding amounts and trends
- Detailed stakeholder analysis with specific roles and influence patterns
**FUTURE PROJECTIONS & STRATEGIC IMPLICATIONS**
- Detailed forecasting with specific timelines and quantitative projections
- Comprehensive risk analysis with specific mitigation strategies
- Thorough opportunity assessment with concrete implementation pathways
- Detailed strategic recommendations with specific action items and resource requirements
=== SYNTHESIS GUIDELINES ===
**INTEGRATION WITH DEPTH:**
- Weave together detailed findings from different research streams naturally
- Maintain the richness and specificity of original research while showing connections
- Preserve technical details, specific data points, and concrete examples
- Build comprehensive understanding through detailed cross-domain analysis
**ENHANCED CITATION STANDARDS:**
- Preserve and integrate complete citation information throughout the narrative
- Use format: "According to [Specific Source/Study Name] (URL if available)..."
- Include specific study details, publication dates, and author/organization information
- Maintain all quantitative claims with specific source attribution
- Provide verifiable references that readers can follow up on
**PROFESSIONAL DEPTH:**
- Focus on comprehensive analysis rather than high-level strategic summaries
- Include technical specifications, implementation details, and operational insights
- Provide specific metrics, performance data, and concrete examples throughout
- Maintain the detailed, professional tone expected in industry research reports
**COMPREHENSIVE COVERAGE:**
- Ensure thorough coverage of all research areas with appropriate depth
- Include specific technical details, market data, and implementation examples
- Provide comprehensive context and background for all major topics
- Maintain professional research report standards with extensive detail and analysis
=== RESEARCH DATA TO SYNTHESIZE ===
{comprehensive_research_data}
=== CRITICAL INSTRUCTIONS ===
1. **LANGUAGE**: Write the ENTIRE report in the same language as the user query
2. **DEPTH**: This is a detailed professional research report, NOT an executive summary
3. **SPECIFICITY**: Include concrete data, metrics, examples, and technical details throughout
4. **INTEGRATION**: Unify findings while preserving the richness and depth of source material
5. **CITATIONS**: Maintain complete, accurate citations that readers can verify and follow
6. **COMPREHENSIVENESS**: Provide thorough coverage that satisfies professional research standards
OUTPUT: A comprehensive, detailed professional research report that integrates findings across all research areas while maintaining the depth, specificity, and professional rigor expected in industry research documentation."""
================================================
FILE: backend/src/agent/report_level_enhancement.py
================================================
"""
Report-Level Content Enhancement Module
During the final report generation phase, the LLM may discover it needs more in-depth specific
information to support its analysis. This module provides the capability to perform targeted
content enhancement during the report generation process.
"""
import os
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnableConfig
from firecrawl import FirecrawlApp
@dataclass
class ReportEnhancementRequest:
"""Report enhancement request"""
enhancement_type: str # "specific_data", "case_study", "technical_details", "market_data"
target_information: str # Specific description of needed information
suggested_sources: List[str] # Suggested source URLs
priority: int # Priority level 1-5
reasoning: str # LLM's reasoning process
@dataclass
class ReportEnhancementResult:
"""Report enhancement result"""
success: bool
enhanced_content: str
sources_used: List[Dict[str, Any]]
enhancement_quality: str # "excellent", "good", "fair", "poor"
class ReportLevelEnhancer:
"""Report-level content enhancer"""
def __init__(self):
self.firecrawl_app = None
if os.getenv("FIRECRAWL_API_KEY"):
self.firecrawl_app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
def analyze_report_enhancement_needs(
self,
user_query: str,
research_plan: List[Dict],
aggregated_research_data: str,
config: RunnableConfig
) -> List[ReportEnhancementRequest]:
"""
Analyze additional information needed during report writing process
This is a pre-analysis step that allows the LLM to identify information gaps
before formal writing begins
"""
enhancement_analysis_prompt = f"""You are a professional research report writing expert. Before writing the final report, please analyze whether the current research data is sufficiently complete and identify additional in-depth information that may be needed.
User Query: {user_query}
Research Plan:
{chr(10).join([f"• {task.get('description', '')}" for task in research_plan])}
Current Research Data Overview:
{aggregated_research_data[:2000]}...
Please analyze the information adequacy in the following dimensions and identify areas that need deep enhancement:
1. **Specific Data & Statistics** - Is there sufficient quantitative data to support the analysis?
2. **Implementation Cases & Technical Details** - Are there specific implementation examples?
3. **Market Data & Competitive Analysis** - Is there latest market sizing and competitive landscape data?
4. **Policies, Regulations & Standards** - Is the relevant regulatory framework covered?
For each area that needs enhancement, please output in the following format:
**ENHANCEMENT_REQUEST_START**
Type: [specific_data|case_study|technical_details|market_data|regulatory_info]
Target: [What specific information is needed]
Priority: [1-5 number]
Reasoning: [Why this information is needed and how it will improve report quality]
Suggested_Sources: [Suggested website types or specific URLs if known that might have this information]
**ENHANCEMENT_REQUEST_END**
If current information is already sufficient, output: **NO_ENHANCEMENT_NEEDED**
Please identify only the most critical 1-3 enhancement needs to avoid over-complication.
"""
from agent.configuration import Configuration
configurable = Configuration.from_runnable_config(config)
llm = ChatGoogleGenerativeAI(
model=configurable.reflection_model,
temperature=0.3,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
response = llm.invoke(enhancement_analysis_prompt)
analysis_text = response.content if hasattr(response, 'content') else str(response)
return self._parse_enhancement_requests(analysis_text)
def _parse_enhancement_requests(self, analysis_text: str) -> List[ReportEnhancementRequest]:
"""Parse LLM's enhancement requests"""
requests = []
if "NO_ENHANCEMENT_NEEDED" in analysis_text:
return requests
import re
# Extract all enhancement request blocks
pattern = r'\*\*ENHANCEMENT_REQUEST_START\*\*(.*?)\*\*ENHANCEMENT_REQUEST_END\*\*'
matches = re.findall(pattern, analysis_text, re.DOTALL)
for match in matches:
try:
request_data = self._parse_single_request(match)
if request_data:
requests.append(request_data)
except Exception as e:
print(f"⚠️ Failed to parse enhancement request: {e}")
continue
return requests[:3] # Maximum 3 requests
def _parse_single_request(self, request_text: str) -> Optional[ReportEnhancementRequest]:
"""Parse a single enhancement request"""
lines = request_text.strip().split('\n')
enhancement_type = ""
target_information = ""
priority = 3
reasoning = ""
suggested_sources = []
for line in lines:
line = line.strip()
if line.startswith('Type:'):
enhancement_type = line.replace('Type:', '').strip()
elif line.startswith('Target:'):
target_information = line.replace('Target:', '').strip()
elif line.startswith('Priority:'):
try:
priority = int(line.replace('Priority:', '').strip())
except:
priority = 3
elif line.startswith('Reasoning:'):
reasoning = line.replace('Reasoning:', '').strip()
elif line.startswith('Suggested_Sources:'):
sources_text = line.replace('Suggested_Sources:', '').strip()
# Simple split, could be more complex in practice
suggested_sources = [s.strip() for s in sources_text.split(',') if s.strip()]
if enhancement_type and target_information:
return ReportEnhancementRequest(
enhancement_type=enhancement_type,
target_information=target_information,
suggested_sources=suggested_sources,
priority=priority,
reasoning=reasoning
)
return None
def execute_targeted_enhancement(
self,
enhancement_requests: List[ReportEnhancementRequest],
available_sources: List[Dict[str, Any]]
) -> List[ReportEnhancementResult]:
"""Execute targeted content enhancement"""
if not self.firecrawl_app:
print("⚠️ Firecrawl not configured, skipping report-level enhancement")
return []
results = []
for request in enhancement_requests:
print(f"🎯 Executing report-level enhancement: {request.enhancement_type}")
print(f" Target information: {request.target_information}")
# Find matching URLs
target_urls = self._find_matching_urls(request, available_sources)
if not target_urls:
print(f" ❌ No matching information sources found")
continue
# Attempt enhancement
enhanced_content = ""
sources_used = []
for url_info in target_urls[:2]: # Try at most 2 URLs
try:
url = url_info.get('url', '')
if not url:
continue
print(f" 🔥 Scraping: {url_info.get('title', 'Unknown')}")
result = self.firecrawl_app.scrape_url(url, params={
'formats': ['markdown'],
'onlyMainContent': True,
'timeout': 30000
})
if result and result.success:
content = result.markdown or ''
if len(content) > 500: # Valid content
enhanced_content += f"\n\n### Source: {url_info.get('title', 'Unknown')}\n{content[:2000]}..."
sources_used.append({
'url': url,
'title': url_info.get('title', ''),
'content_length': len(content)
})
print(f" ✅ Success: {len(content)} characters")
else:
print(f" ⚠️ Content too short: {len(content)} characters")
else:
print(f" ❌ Scraping failed")
except Exception as e:
print(f" ❌ Scraping exception: {str(e)}")
continue
if enhanced_content and sources_used:
quality = self._assess_enhancement_quality(enhanced_content, request)
results.append(ReportEnhancementResult(
success=True,
enhanced_content=enhanced_content,
sources_used=sources_used,
enhancement_quality=quality
))
print(f" ✅ Enhancement completed, quality: {quality}")
else:
results.append(ReportEnhancementResult(
success=False,
enhanced_content="",
sources_used=[],
enhancement_quality="failed"
))
print(f" ❌ Enhancement failed")
return results
def _find_matching_urls(
self,
request: ReportEnhancementRequest,
available_sources: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Find URLs matching the enhancement request"""
target_keywords = request.target_information.lower().split()
enhancement_type = request.enhancement_type
scored_sources = []
for source in available_sources:
title = source.get('title', '').lower()
url = source.get('url', '').lower()
score = 0
# Keyword matching
for keyword in target_keywords:
if keyword in title:
score += 2
if keyword in url:
score += 1
# Type matching
type_scoring = {
'specific_data': ['data', 'statistics', 'report', 'research', 'study'],
'case_study': ['case', 'example', 'implementation', 'deployment', 'success'],
'technical_details': ['technical', 'specification', 'documentation', 'guide', 'manual'],
'market_data': ['market', 'industry', 'competition', 'analysis', 'forecast'],
'regulatory_info': ['regulation', 'policy', 'standard', 'compliance', 'legal']
}
type_keywords = type_scoring.get(enhancement_type, [])
for keyword in type_keywords:
if keyword in title or keyword in url:
score += 1
# Authority bonus
if any(domain in url for domain in ['.gov', '.edu', '.org']):
score += 3
if score > 0:
scored_sources.append((source, score))
# Sort by score
scored_sources.sort(key=lambda x: x[1], reverse=True)
return [source for source, score in scored_sources if score >= 2]
def _assess_enhancement_quality(
self,
content: str,
request: ReportEnhancementRequest
) -> str:
"""Assess enhancement content quality"""
if not content:
return "poor"
length = len(content)
target_keywords = request.target_information.lower().split()
# Keyword matching rate
keyword_matches = sum(1 for keyword in target_keywords if keyword in content.lower())
keyword_ratio = keyword_matches / len(target_keywords) if target_keywords else 0
# Length assessment
if length > 2000 and keyword_ratio > 0.6:
return "excellent"
elif length > 1000 and keyword_ratio > 0.4:
return "good"
elif length > 500 and keyword_ratio > 0.2:
return "fair"
else:
return "poor"
def integrate_report_enhancement_into_finalize(
user_query: str,
research_plan: List[Dict],
aggregated_research_data: str,
available_sources: List[Dict[str, Any]],
config: RunnableConfig
) -> Tuple[str, List[ReportEnhancementResult]]:
"""
Integrate report-level enhancement into finalize_answer process
Returns: (enhanced_research_data, enhancement_results)
"""
enhancer = ReportLevelEnhancer()
# 1. Analyze enhancement needs
enhancement_requests = enhancer.analyze_report_enhancement_needs(
user_query, research_plan, aggregated_research_data, config
)
if not enhancement_requests:
print("✅ Report-level analysis: Current information is sufficient, no additional enhancement needed")
return aggregated_research_data, []
print(f"🎯 Identified {len(enhancement_requests)} report-level enhancement needs")
for i, req in enumerate(enhancement_requests, 1):
print(f" {i}. {req.enhancement_type}: {req.target_information}")
# 2. Execute enhancement
enhancement_results = enhancer.execute_targeted_enhancement(
enhancement_requests, available_sources
)
# 3. Merge enhanced content
enhanced_data = aggregated_research_data
successful_enhancements = [r for r in enhancement_results if r.success]
if successful_enhancements:
enhanced_sections = []
for result in successful_enhancements:
enhanced_sections.append(f"\n\n## Report-Level Deep Enhancement\n{result.enhanced_content}")
enhanced_data += "\n" + "\n".join(enhanced_sections)
print(f"✅ Report-level enhancement completed: {len(successful_enhancements)} successful")
else:
print("⚠️ Report-level enhancement did not yield effective content")
return enhanced_data, enhancement_results
================================================
FILE: backend/src/agent/state.py
================================================
from __future__ import annotations
from dataclasses import dataclass, field
from typing import TypedDict, List, Optional, Dict, Any
from langgraph.graph import add_messages
from typing_extensions import Annotated
import operator
from dataclasses import dataclass, field
from typing_extensions import Annotated
class LedgerEntry(TypedDict):
task_id: str
description: str
findings_summary: str # The concise (1-2 sentence) LLM-generated summary for this task
detailed_snippets: Optional[List[str]] # List of relevant web_research_result strings
citations_for_snippets: Optional[List[Dict[str, str]]] # Maps snippets to sources
class OverallState(TypedDict):
messages: Annotated[list, add_messages]
user_query: str # Store original user question
plan: list # Store task plan generated by planner_node
current_task_pointer: int # Point to current task in plan
executed_search_queries: Annotated[list, operator.add] # Renamed from search_query
web_research_result: Annotated[list, operator.add]
sources_gathered: Annotated[list, operator.add]
initial_search_query_count: int
max_research_loops: int
research_loop_count: int
reasoning_model: str
# --- Day 2 additions for multi-task iteration ---
ledger: Annotated[List[LedgerEntry], operator.add] # Records of completed task findings
global_summary_memory: Annotated[List[str], operator.add] # Cross-task memory accumulation
# --- Day 3 additions for richer synthesis ---
current_task_detailed_findings: Annotated[List[Dict[str, Any]], operator.add] # Temporary storage for current task's detailed findings
task_specific_results: Annotated[List[Dict[str, Any]], operator.add] # Task-specific research results with task_id
final_report_markdown: Optional[str] # The final synthesized report
# --- Reflection结果字段 ---
reflection_is_sufficient: Optional[bool] # reflection判断的信息充足性
reflection_knowledge_gap: Optional[str] # reflection识别的知识差距
reflection_follow_up_queries: Optional[List[str]] # reflection建议的follow-up查询
number_of_ran_queries: Optional[int] # 已执行的查询数量
# --- 增强版评估结果字段 ---
evaluation_is_sufficient: Optional[bool] # 最终评估的信息充足性
evaluation_should_continue: Optional[bool] # 是否应该继续研究
evaluation_follow_up_queries: Optional[List[str]] # 评估建议的follow-up查询
evaluation_research_complete: Optional[bool] # 研究是否完成
evaluation_enhancement_boost: Optional[int] # 内容增强带来的提升度
# --- 智能内容增强字段 ---
enhancement_decision: Optional[Dict[str, Any]] # 增强决策结果
enhancement_status: Optional[str] # "skipped", "completed", "failed", "error", "analyzing"
enhanced_content_results: Optional[List[Dict[str, Any]]] # Firecrawl增强内容结果
enhanced_sources_count: Optional[int] # 成功增强的源数量
enhancement_error: Optional[str] # 增强过程中的错误信息
class ReflectionState(TypedDict):
is_sufficient: bool
knowledge_gap: str
follow_up_queries: Annotated[list, operator.add]
research_loop_count: int
number_of_ran_queries: int
plan: list
current_task_pointer: int
class Query(TypedDict):
query: str
rationale: str
class QueryGenerationState(TypedDict):
query_list: list[Query]
plan: list
current_task_pointer: int
class WebSearchState(TypedDict):
search_query: str
id: str
current_task_id: str
@dataclass(kw_only=True)
class SearchStateOutput:
running_summary: str = field(default=None) # Final report
================================================
FILE: backend/src/agent/tools_and_schemas.py
================================================
from typing import List
from pydantic import BaseModel, Field
class SearchQueryList(BaseModel):
query: List[str] = Field(
description="A list of search queries to be used for web research."
)
rationale: str = Field(
description="A brief explanation of why these queries are relevant to the research topic."
)
class Reflection(BaseModel):
is_sufficient: bool = Field(
description="Whether the provided summaries are sufficient to answer the user's question."
)
knowledge_gap: str = Field(
description="A description of what information is missing or needs clarification."
)
follow_up_queries: List[str] = Field(
description="A list of follow-up queries to address the knowledge gap."
)
class ResearchTask(BaseModel):
id: str = Field(description="Unique identifier for the task.")
description: str = Field(description="A concise description of what this research task aims to achieve.")
class ResearchPlan(BaseModel):
tasks: List[ResearchTask] = Field(description="A list of research tasks to be executed.")
class LedgerEntry(BaseModel):
"""Record of completed task findings for the ledger."""
task_id: str = Field(description="Unique identifier of the completed task")
description: str = Field(description="Original task description")
findings_summary: str = Field(description="Concise summary of key findings for this task")
================================================
FILE: backend/src/agent/utils.py
================================================
from typing import Any, Dict, List
from langchain_core.messages import AnyMessage, AIMessage, HumanMessage
def get_research_topic(messages: List[AnyMessage]) -> str:
"""
Get the research topic from the messages.
"""
# check if request has a history and combine the messages into a single string
if len(messages) == 1:
research_topic = messages[-1].content
else:
research_topic = ""
for message in messages:
if isinstance(message, HumanMessage):
research_topic += f"User: {message.content}\n"
elif isinstance(message, AIMessage):
research_topic += f"Assistant: {message.content}\n"
return research_topic
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
"""
Create a map that preserves the original URLs instead of replacing them with fake internal IDs.
This ensures citations point to real, accessible web sources.
"""
# Extract real URLs from the search results
urls = [site.web.uri for site in urls_to_resolve]
# Create a dictionary that maps each unique URL to itself (preserve original URLs)
# We only need to deduplicate, not create fake internal URLs
resolved_map = {}
for idx, url in enumerate(urls):
if url not in resolved_map:
# Keep the original URL instead of creating a fake vertexaisearch URL
resolved_map[url] = url
return resolved_map
def insert_citation_markers(text, citations_list):
"""
Inserts citation markers into a text string based on start and end indices.
Args:
text (str): The original text string.
citations_list (list): A list of dictionaries, where each dictionary
contains 'start_index', 'end_index', and
'segment_string' (the marker to insert).
Indices are assumed to be for the original text.
Returns:
str: The text with citation markers inserted.
"""
# Sort citations by end_index in descending order.
# If end_index is the same, secondary sort by start_index descending.
# This ensures that insertions at the end of the string don't affect
# the indices of earlier parts of the string that still need to be processed.
sorted_citations = sorted(
citations_list, key=lambda c: (c["end_index"], c["start_index"]), reverse=True
)
modified_text = text
for citation_info in sorted_citations:
# These indices refer to positions in the *original* text,
# but since we iterate from the end, they remain valid for insertion
# relative to the parts of the string already processed.
end_idx = citation_info["end_index"]
marker_to_insert = ""
for segment in citation_info["segments"]:
marker_to_insert += f" [{segment['label']}]({segment['short_url']})"
# Insert the citation marker at the original end_idx position
modified_text = (
modified_text[:end_idx] + marker_to_insert + modified_text[end_idx:]
)
return modified_text
def get_citations(response, resolved_urls_map):
"""
Extracts and formats citation information from a Gemini model's response.
This function processes the grounding metadata provided in the response to
construct a list of citation objects. Each citation object includes the
start and end indices of the text segment it refers to, and a string
containing formatted markdown links to the supporting web chunks.
Args:
response: The response object from the Gemini model, expected to have
a structure including `candidates[0].grounding_metadata`.
It also relies on a `resolved_map` being available in its
scope to map chunk URIs to resolved URLs.
Returns:
list: A list of dictionaries, where each dictionary represents a citation
and has the following keys:
- "start_index" (int): The starting character index of the cited
segment in the original text. Defaults to 0
if not specified.
- "end_index" (int): The character index immediately after the
end of the cited segment (exclusive).
- "segments" (list[str]): A list of individual markdown-formatted
links for each grounding chunk.
- "segment_string" (str): A concatenated string of all markdown-
formatted links for the citation.
Returns an empty list if no valid candidates or grounding supports
are found, or if essential data is missing.
"""
citations = []
# Ensure response and necessary nested structures are present
if not response or not response.candidates:
return citations
candidate = response.candidates[0]
if (
not hasattr(candidate, "grounding_metadata")
or not candidate.grounding_metadata
or not hasattr(candidate.grounding_metadata, "grounding_supports")
):
return citations
for support in candidate.grounding_metadata.grounding_supports:
citation = {}
# Ensure segment information is present
if not hasattr(support, "segment") or support.segment is None:
continue # Skip this support if segment info is missing
start_index = (
support.segment.start_index
if support.segment.start_index is not None
else 0
)
# Ensure end_index is present to form a valid segment
if support.segment.end_index is None:
continue # Skip if end_index is missing, as it's crucial
# Add 1 to end_index to make it an exclusive end for slicing/range purposes
# (assuming the API provides an inclusive end_index)
citation["start_index"] = start_index
citation["end_index"] = support.segment.end_index
citation["segments"] = []
if (
hasattr(support, "grounding_chunk_indices")
and support.grounding_chunk_indices
):
for ind in support.grounding_chunk_indices:
try:
chunk = candidate.grounding_metadata.grounding_chunks[ind]
resolved_url = resolved_urls_map.get(chunk.web.uri, None)
citation["segments"].append(
{
"label": chunk.web.title.split(".")[:-1][0],
"short_url": resolved_url,
"value": chunk.web.uri,
}
)
except (IndexError, AttributeError, NameError):
# Handle cases where chunk, web, uri, or resolved_map might be problematic
# For simplicity, we'll just skip adding this particular segment link
# In a production system, you might want to log this.
pass
citations.append(citation)
return citations
================================================
FILE: backend/test-agent.ipynb
================================================
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from agent import graph\n",
"\n",
"state = graph.invoke({\"messages\": [{\"role\": \"user\", \"content\": \"Who won the euro 2024\"}], \"max_research_loops\": 3, \"initial_search_query_count\": 3})"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'messages': [HumanMessage(content='Who won the euro 2024', additional_kwargs={}, response_metadata={}, id='4b0ccc12-2e74-4a55-a85e-c512e7867c26'),\n",
" AIMessage(content=\"Spain won the UEFA Euro 2024 tournament [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL).\\n\\nIn the final match held in Berlin, Germany, Spain defeated England 2-1 [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL). Nico Williams scored the opening goal for Spain, and Mikel Oyarzabal scored the winning goal [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz). Cole Palmer scored England's only goal [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz).\\n\\nThis victory marked Spain's record fourth European Championship title [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL). Spain achieved this by winning all seven of their matches throughout the tournament [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgwKo5lPes5M_GObnkYEzn3QYn1kpTQpx42ANaNqvNMgRsB1Xp2TIXI82SYTSYuLd9ysgKfmlJJy3lcLxrmNBg1R_Z37PCO9vbqIBIbw6DKqMif7pHdtDTS7FUq69c29hkYb_b5w==) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL) [newsbytesapp](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s=).\\n\\nKey individual awards for the tournament went to Spain's players: Rodri was named the Best Player, and Lamine Yamal was named the Best Young Player [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==0) [bet9ja](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q==) [beinsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA==) [thehindu](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw==).\", additional_kwargs={}, response_metadata={}, id='4c4aa673-391d-48b2-954a-9fcb7053c634')],\n",
" 'search_query': ['Euro 2024 winner',\n",
" \"What were Spain's key team performance statistics throughout Euro 2024?\",\n",
" 'What specific stats or performances led to Rodri being named Euro 2024 Best Player?',\n",
" 'What specific stats or performances led to Lamine Yamal being named Euro 2024 Best Young Player?'],\n",
" 'web_research_result': [\"Spain won the UEFA Euro 2024, securing their record fourth title [youtube](https://vertexaisearch.cloud.google.com/id/0-0) [aljazeera](https://vertexaisearch.cloud.google.com/id/0-1) [foxsports](https://vertexaisearch.cloud.google.com/id/0-2) [wikipedia](https://vertexaisearch.cloud.google.com/id/0-3) [youtube](https://vertexaisearch.cloud.google.com/id/0-4) [uefa](https://vertexaisearch.cloud.google.com/id/0-5). The final match was held in Berlin, Germany, where Spain defeated England 2-1 [olympics](https://vertexaisearch.cloud.google.com/id/0-6) [aljazeera](https://vertexaisearch.cloud.google.com/id/0-1) [foxsports](https://vertexaisearch.cloud.google.com/id/0-2). Spain's Nico Williams scored the opening goal, and Mikel Oyarzabal scored the winning goal [youtube](https://vertexaisearch.cloud.google.com/id/0-0) [aljazeera](https://vertexaisearch.cloud.google.com/id/0-1) [foxsports](https://vertexaisearch.cloud.google.com/id/0-2). England's Cole Palmer scored their lone goal [olympics](https://vertexaisearch.cloud.google.com/id/0-6) [aljazeera](https://vertexaisearch.cloud.google.com/id/0-1) [foxsports](https://vertexaisearch.cloud.google.com/id/0-2).\\n\\nSpain won all seven of their matches in the tournament [youtube](https://vertexaisearch.cloud.google.com/id/0-7) [wikipedia](https://vertexaisearch.cloud.google.com/id/0-3) [youtube](https://vertexaisearch.cloud.google.com/id/0-4) [uefa](https://vertexaisearch.cloud.google.com/id/0-5). In the quarter-finals, Spain defeated Germany 2-1 after extra time [olympics](https://vertexaisearch.cloud.google.com/id/0-6) [wikipedia](https://vertexaisearch.cloud.google.com/id/0-3). In the semi-finals, Spain beat France 2-1 [olympics](https://vertexaisearch.cloud.google.com/id/0-6) [aljazeera](https://vertexaisearch.cloud.google.com/id/0-8) [wikipedia](https://vertexaisearch.cloud.google.com/id/0-3). Lamine Yamal became the youngest player to score in a UEFA European Championship [ndtv](https://vertexaisearch.cloud.google.com/id/0-9) [uefa](https://vertexaisearch.cloud.google.com/id/0-5).\\n\\nThe top scorers of the tournament were Harry Kane, Georges Mikautadze, Jamal Musiala, Cody Gakpo, Ivan Schranz and Dani Olmo, each with 3 goals [wikipedia](https://vertexaisearch.cloud.google.com/id/0-10). Rodri was named best player and Lamine Yamal best young player of the tournament [wikipedia](https://vertexaisearch.cloud.google.com/id/0-10). Luis de la Fuente was the coach who led Spain to victory [transfermarkt](https://vertexaisearch.cloud.google.com/id/0-11).\\n\",\n",
" \"Spain won Euro 2024, defeating England 2-1 in the final to secure their record fourth European Championship [aljazeera](https://vertexaisearch.cloud.google.com/id/1-0) [coachesvoice](https://vertexaisearch.cloud.google.com/id/1-1) [aljazeera](https://vertexaisearch.cloud.google.com/id/1-2) [wikipedia](https://vertexaisearch.cloud.google.com/id/1-3). They won all seven of their matches in the competition [wikipedia](https://vertexaisearch.cloud.google.com/id/1-3).\\n\\nHere's a summary of Spain's key team performance statistics throughout Euro 2024:\\n\\n**General Stats:**\\n\\n* **Goals Scored:** Spain scored 15 goals throughout the tournament, setting a new record for most goals in a single European Championship [wikipedia](https://vertexaisearch.cloud.google.com/id/1-3). They scored 13 goals before the final [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5).\\n* **Goals Conceded:** Spain conceded only three goals in the tournament [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5).\\n* **Wins:** Spain had a 100% win record in Euro 2024 [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5). They won all six of their matches leading up to the final [aljazeera](https://vertexaisearch.cloud.google.com/id/1-2) [sportsmole](https://vertexaisearch.cloud.google.com/id/1-6).\\n* **Clean Sheets:** Spain had three clean sheets in Euro 2024 [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [thehindu](https://vertexaisearch.cloud.google.com/id/1-7).\\n* **Possession:** Spain averaged 57.3% possession during the tournament [thehindu](https://vertexaisearch.cloud.google.com/id/1-4). They often maintained possession for over 65% of their matches [spanishprofootball](https://vertexaisearch.cloud.google.com/id/1-8).\\n* **Passing Accuracy:** Spain had a passing accuracy of 90% [thehindu](https://vertexaisearch.cloud.google.com/id/1-4).\\n* **Ball Recoveries:** Spain led the tournament in ball recoveries with 255 [thehindu](https://vertexaisearch.cloud.google.com/id/1-4).\\n* **Shots:** Spain had 80 shots (excluding blocks), with 38 on target [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5). They had the most attempts in Euro 2024, with 108, 37 of which were on target [thehindu](https://vertexaisearch.cloud.google.com/id/1-4).\\n* **Chances Created:** Spain created 85 chances [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5).\\n* **Tackles:** Spain made 92 tackles [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5).\\n\\n**Team Composition and Tactics:**\\n\\n* The squad featured a blend of experienced players and young talents [totalfootballanalysis](https://vertexaisearch.cloud.google.com/id/1-9).\\n* Luis de la Fuente employed multifaceted tactics, adapting to different opponents [totalfootballanalysis](https://vertexaisearch.cloud.google.com/id/1-10).\\n* Spain dominated possession and controlled the tempo of matches [spanishprofootball](https://vertexaisearch.cloud.google.com/id/1-8).\\n* They utilized a high pressing strategy and quick recovery [spanishprofootball](https://vertexaisearch.cloud.google.com/id/1-8).\\n* Fluid midfield dynamics were powered by players like Pedri, Rodri, and Gavi [spanishprofootball](https://vertexaisearch.cloud.google.com/id/1-8).\\n\\n**Individual Player Stats:**\\n\\n* **Dani Olmo:** Joint leading goal scorer with three goals [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5). He also provided two assists [newsbytesapp](https://vertexaisearch.cloud.google.com/id/1-5).\\n* **Lamine Yamal:** Joint assist leader with three assists [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [thehindu](https://vertexaisearch.cloud.google.com/id/1-7). He also became the youngest-ever Euros scorer [sportsmole](https://vertexaisearch.cloud.google.com/id/1-6) [wikipedia](https://vertexaisearch.cloud.google.com/id/1-3).\\n* **Rodri:** Completed the most passes for Spain [thehindu](https://vertexaisearch.cloud.google.com/id/1-4) [thehindu](https://vertexaisearch.cloud.google.com/id/1-7).\\n* **Aymeric Laporte:** Recovered the ball the most number of times for Spain defensively [thehindu](https://vertexaisearch.cloud.google.com/id/1-4).\\n* **Unai Simon:** Conceded three goals and made 12 saves in five matches [thehindu](https://vertexaisearch.cloud.google.com/id/1-4).\\n* **Nico Williams:** Named Man of the Match in the final [wikipedia](https://vertexaisearch.cloud.google.com/id/1-3).\\n\\nSpain's coach, Luis de la Fuente, emphasized versatility, pace on the wings, control in the middle, and a solid defense as key to their balance [coachesvoice](https://vertexaisearch.cloud.google.com/id/1-1).\\n\",\n",
" 'Rodri was named Euro 2024 Best Player due to his consistent and brilliant performances throughout the tournament [bet9ja](https://vertexaisearch.cloud.google.com/id/2-0). He was the centerpiece of Spain\\'s midfield, playing a crucial role in nearly every game [europeanchampionship2024](https://vertexaisearch.cloud.google.com/id/2-1). Here\\'s a breakdown of the specific stats and performances that led to the award:\\n\\n* **Key Role in Spain\\'s Victories:** Rodri played a crucial role in Spain\\'s victories over Germany and France [bet9ja](https://vertexaisearch.cloud.google.com/id/2-0).\\n* **Midfield Dominance:** Rodri\\'s consistent presence in midfield was pivotal for Spain [europeanchampionship2024](https://vertexaisearch.cloud.google.com/id/2-1).\\n* **Only Goal:** He scored a goal in Spain\\'s 4-1 win over Georgia in the Last 16 [indiatimes](https://vertexaisearch.cloud.google.com/id/2-2) [bet9ja](https://vertexaisearch.cloud.google.com/id/2-0).\\n* **Passing Accuracy:** Rodri had a remarkable passing accuracy of 92.84% [uefa](https://vertexaisearch.cloud.google.com/id/2-3) [mancity](https://vertexaisearch.cloud.google.com/id/2-4) [uefa](https://vertexaisearch.cloud.google.com/id/2-5). Only Aymeric Laporte completed more passes for Spain with 411 passes [mancity](https://vertexaisearch.cloud.google.com/id/2-4).\\n* **Ball Recoveries:** Rodri was also pivotal when out of possession, with just one other midfielder registering more ball recoveries than the Spaniard\\'s 33 [mancity](https://vertexaisearch.cloud.google.com/id/2-4).\\n* **Leadership:** He led his team with distinction [europeanchampionship2024](https://vertexaisearch.cloud.google.com/id/2-1). Rodri\\'s leadership on the field helped integrate young talents [bet9ja](https://vertexaisearch.cloud.google.com/id/2-0).\\n* **Strategic Rest:** He started in six of Spain\\'s seven matches, only sitting out the final group stage game against Slovakia, which Spain won 1-0. This strategic rest allowed Rodri to stay fresh for the knockout stages [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Calmness Under Pressure:** Rodri\\'s calmness under pressure was a recurring theme throughout the tournament [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Dictating Tempo:** His ability to dictate the tempo of the game, coupled with his defensive prowess, made Rodri indispensable [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Orchestration:** Rodri\\'s orchestration was crucial in maintaining possession and preventing Germany from gaining momentum in the quarter-final [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Midfield Control:** His performance against France in the semi-finals was another masterclass in midfield control [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Composure and Strategic Thinking:** Rodri\\'s composure and strategic thinking brought a sense of reliability to Spain\\'s gameplay [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n* **Impact in the Final:** Despite his early exit due to a hamstring injury in the final against England, Rodri\\'s presence in the first half helped Spain establish control and set the tone for the rest of the match [upthrust](https://vertexaisearch.cloud.google.com/id/2-6).\\n\\nLuis de la Fuente, the coach of the Spanish team, described Rodri as a \"perfect computer\" due to his precise passing and exceptional understanding of the game [indiatimes](https://vertexaisearch.cloud.google.com/id/2-2) [bet9ja](https://vertexaisearch.cloud.google.com/id/2-0). UEFA\\'s team of technical observers at EURO 2024 also recognized Rodri\\'s influence in central midfield [uefa](https://vertexaisearch.cloud.google.com/id/2-7).\\n',\n",
" \"Lamine Yamal was named Euro 2024 Young Player of the Tournament due to several outstanding achievements [uefa](https://vertexaisearch.cloud.google.com/id/3-0) [beinsports](https://vertexaisearch.cloud.google.com/id/3-1) [thehindu](https://vertexaisearch.cloud.google.com/id/3-2). He played in all seven of Spain's Euro 2024 matches, starting in six of them [uefa](https://vertexaisearch.cloud.google.com/id/3-0). He became the youngest player ever to play in the tournament when he started against Croatia at 16 years, 338 days old [uefa](https://vertexaisearch.cloud.google.com/id/3-0) [uefa](https://vertexaisearch.cloud.google.com/id/3-3). In the semi-final against France, he scored a remarkable goal, making him the youngest goalscorer in Euros history at 16 years, 362 days [wikipedia](https://vertexaisearch.cloud.google.com/id/3-4) [uefa](https://vertexaisearch.cloud.google.com/id/3-0) [uefa](https://vertexaisearch.cloud.google.com/id/3-3) [beinsports](https://vertexaisearch.cloud.google.com/id/3-1) [thehindu](https://vertexaisearch.cloud.google.com/id/3-2). Furthermore, he provided four assists during the tournament [wikipedia](https://vertexaisearch.cloud.google.com/id/3-4) [thehindu](https://vertexaisearch.cloud.google.com/id/3-5) [beinsports](https://vertexaisearch.cloud.google.com/id/3-1). In the final, he set up the opening goal against England [uefa](https://vertexaisearch.cloud.google.com/id/3-0).\\n\\nKey statistics from the tournament include [uefa](https://vertexaisearch.cloud.google.com/id/3-6) [uefa](https://vertexaisearch.cloud.google.com/id/3-7):\\n* 7 Matches played\\n* 507 Minutes played\\n* 1 Goal\\n* 4 Assists\\n\\nThese performances led to Yamal receiving the Euro 2024 Young Player of the Tournament award [uefa](https://vertexaisearch.cloud.google.com/id/3-0) [beinsports](https://vertexaisearch.cloud.google.com/id/3-1) [thehindu](https://vertexaisearch.cloud.google.com/id/3-2).\\n\"],\n",
" 'sources_gathered': [{'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q=='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig=='},\n",
" {'label': 'foxsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o='},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP'},\n",
" {'label': 'olympics',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe'},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig=='},\n",
" {'label': 'foxsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz'},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q=='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig=='},\n",
" {'label': 'foxsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz'},\n",
" {'label': 'olympics',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe'},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig=='},\n",
" {'label': 'foxsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz'},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgwKo5lPes5M_GObnkYEzn3QYn1kpTQpx42ANaNqvNMgRsB1Xp2TIXI82SYTSYuLd9ysgKfmlJJy3lcLxrmNBg1R_Z37PCO9vbqIBIbw6DKqMif7pHdtDTS7FUq69c29hkYb_b5w=='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o='},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP'},\n",
" {'label': 'olympics',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o='},\n",
" {'label': 'olympics',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe'},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-8',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFdu_dxqteuc9vM3oGH5WgEnFuOA6vlmbqof-iVRg2OviD2jzkp1jlCRsWkLfb64cK8TJ_g5jKKfZgmaMCk4LA-E2zjYGBfmsWiHdwfSg5Zv3VDMngM3HxT-VLjWYdBdpvpcBTj9VNRkqSCAjGVL9ar0VAOF0uRF6Z96LFz7G9KCSL50llqG7XLpbXmQTFIV4FUsffI8aQG9KKmIaZ1eGqeWQl2xaaRu6-Pwzqxizg8'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o='},\n",
" {'label': 'ndtv',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-9',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFRRH83ij2MgKWwrGFVWMaFDAT0_GKCFdwVIjaYn7DOoBlxXCGR-Y2RTw9AdKH8dYuhXxSxUTaZNXOBac2nknNZpdmwJiGIj51H6lRWREPUPOiKQkfVPJ0f4ubRSJBLm7_QcAkz4BwzJr3OM06jh-41TbNFZ9t6D7WrbzxmSs7x1O5DCnrPM2OeI6Nc0OhVT0AbeC6f_dTaBR9APlQFDrzIsvDIAn-W5eWuEohDs8w6np0eW65RuhQWrofdY8vFz-bsHgK0J3ew'},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-10',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGbyTk6AGj4XMhW66noNoKqe8eCt9-HZUMs6FXsKVyXcMuoG1WLLhBHa9dITcU3zQFJqCzcxPmnu6rj3ZHmJp-n2xdffBtWYFl2pqxmLrEiZONNYLwleA-T8cnaL7gXWfFlJ2jnvB0='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-10',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGbyTk6AGj4XMhW66noNoKqe8eCt9-HZUMs6FXsKVyXcMuoG1WLLhBHa9dITcU3zQFJqCzcxPmnu6rj3ZHmJp-n2xdffBtWYFl2pqxmLrEiZONNYLwleA-T8cnaL7gXWfFlJ2jnvB0='},\n",
" {'label': 'transfermarkt',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-11',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFMeGs_GRmx0zI6E_xQZfylxykYcTT9MnZlM3ICoa41Pogn4H-1tLirtdPBOrumyI8s_C9i9cBukjUKHxlPfPP49aqTep7xFPgfe2uQFyG37Acsn9RtVv5VenCS5kfPLDQB7sGR-Tyj6wGyiptaTP1uhRnGgYg0u92BW5OH-MY='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co='},\n",
" {'label': 'coachesvoice',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv'},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU='},\n",
" {'label': 'sportsmole',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEVHkRwlOhx_8CZHVDe9XPE_nCs4XYVbx6aIl19aXGNLZxDpcsK5-hcYvMX_et8vasZtMNzmJNTtVd3Vne666vIkkRFUNJxVSBH9bMoGEFcPMcPoxFMUY5LV1YGZjm3n6xbDrkskawWb9MBS-zIIXiXZk7n6TluCji9k3ur3i5-ZhJcgPtAYU-KyfWRTdN0JY4bJt4tAl87Ba9ZInk9YuRlLlAFJ6flaKI-a4cZSXYDQeERhB742z_heWOhDchdvlPfoJaAuYSKKaABrbZQeZw='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG7_kutwvl9NHZQl-k0Vpvj_1I7o8MCX8jNlw6rYXEOGSC9QcRvzaH9ycR3JQUjJLvUhUSeaR7hmJ-qPTgMSfw9US7uXQzTF3CJ-tXnIVI1UC8VRyJoW6fH2r-MRFd5EI-PS494grt4Xey1x7WsaZ_Q7tRcQgVX_EM0JxQK12s8yYAY3TIUpa1L5fZOmsi6ZKq-jrXYOmIV5OTu2AaleBeQE_Z-B10oU2qin2Q3T8w6LP2ispUlVEh54d5fWLcHlEtskrRHC8psjrarTgqn'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'spanishprofootball',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-8',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFG8gCwweIne3MmZpbUnDq24EeYu1w6OpSNeS2U5DtRYUbqRVtIjCnFAOjlXy8XjD8MvbmoNIsRD9rdadJ7tWoyG3T5fj2QvlMdWjCXwpMs7W3D_49AT_d1vWRuu8i_-nAK0WHpo6Wo5abiRpwUyjtFX1rYGXujmwsodi5hUV9Q4Qd1ltJe2cuLhq2cPRU='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'totalfootballanalysis',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-9',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGQ6VynwIGq-L7FKFu-L5vh_TzCWZHXL9rXmzI0uuR1Qexwi1jRKOzvfthF3hl-KGOQhdsEC67FoNIH5ojbVkEVCxdDkX73E9DZUv8Vz_GRld1NHm0gm0i7n-KaZ5w72dfptRLWKyKnfY6UZawwFX3OtwTfYQzHd32wv1s4sk0PIUNOj-FdhnWxaYO-PJSC_aZcwpuVrmEgOqXy0Xk='},\n",
" {'label': 'totalfootballanalysis',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-10',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHYrmUdaz_yH2TIUYp54IQ9PxJYBikelavTVFZ5gy2Up1Kaavkf0zeM14L7mTiuPxGEHjaQjn8mLt3I1HdZH34VrBJn6sZ07KzPCX9Bo7gkM44oroevlhaXZtFG65maD7igABOGLBJjZE0Hg17i3EIGTMVfE-OEn0NN53EhY1pLQObHKWJogrtjbLil0XJOV9Ym5_La7JuWQpKo7IiuPlH-w7N_vJHgTDZOxJMY'},\n",
" {'label': 'spanishprofootball',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-8',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFG8gCwweIne3MmZpbUnDq24EeYu1w6OpSNeS2U5DtRYUbqRVtIjCnFAOjlXy8XjD8MvbmoNIsRD9rdadJ7tWoyG3T5fj2QvlMdWjCXwpMs7W3D_49AT_d1vWRuu8i_-nAK0WHpo6Wo5abiRpwUyjtFX1rYGXujmwsodi5hUV9Q4Qd1ltJe2cuLhq2cPRU='},\n",
" {'label': 'spanishprofootball',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-8',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFG8gCwweIne3MmZpbUnDq24EeYu1w6OpSNeS2U5DtRYUbqRVtIjCnFAOjlXy8XjD8MvbmoNIsRD9rdadJ7tWoyG3T5fj2QvlMdWjCXwpMs7W3D_49AT_d1vWRuu8i_-nAK0WHpo6Wo5abiRpwUyjtFX1rYGXujmwsodi5hUV9Q4Qd1ltJe2cuLhq2cPRU='},\n",
" {'label': 'spanishprofootball',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-8',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFG8gCwweIne3MmZpbUnDq24EeYu1w6OpSNeS2U5DtRYUbqRVtIjCnFAOjlXy8XjD8MvbmoNIsRD9rdadJ7tWoyG3T5fj2QvlMdWjCXwpMs7W3D_49AT_d1vWRuu8i_-nAK0WHpo6Wo5abiRpwUyjtFX1rYGXujmwsodi5hUV9Q4Qd1ltJe2cuLhq2cPRU='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG7_kutwvl9NHZQl-k0Vpvj_1I7o8MCX8jNlw6rYXEOGSC9QcRvzaH9ycR3JQUjJLvUhUSeaR7hmJ-qPTgMSfw9US7uXQzTF3CJ-tXnIVI1UC8VRyJoW6fH2r-MRFd5EI-PS494grt4Xey1x7WsaZ_Q7tRcQgVX_EM0JxQK12s8yYAY3TIUpa1L5fZOmsi6ZKq-jrXYOmIV5OTu2AaleBeQE_Z-B10oU2qin2Q3T8w6LP2ispUlVEh54d5fWLcHlEtskrRHC8psjrarTgqn'},\n",
" {'label': 'sportsmole',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEVHkRwlOhx_8CZHVDe9XPE_nCs4XYVbx6aIl19aXGNLZxDpcsK5-hcYvMX_et8vasZtMNzmJNTtVd3Vne666vIkkRFUNJxVSBH9bMoGEFcPMcPoxFMUY5LV1YGZjm3n6xbDrkskawWb9MBS-zIIXiXZk7n6TluCji9k3ur3i5-ZhJcgPtAYU-KyfWRTdN0JY4bJt4tAl87Ba9ZInk9YuRlLlAFJ6flaKI-a4cZSXYDQeERhB742z_heWOhDchdvlPfoJaAuYSKKaABrbZQeZw='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG7_kutwvl9NHZQl-k0Vpvj_1I7o8MCX8jNlw6rYXEOGSC9QcRvzaH9ycR3JQUjJLvUhUSeaR7hmJ-qPTgMSfw9US7uXQzTF3CJ-tXnIVI1UC8VRyJoW6fH2r-MRFd5EI-PS494grt4Xey1x7WsaZ_Q7tRcQgVX_EM0JxQK12s8yYAY3TIUpa1L5fZOmsi6ZKq-jrXYOmIV5OTu2AaleBeQE_Z-B10oU2qin2Q3T8w6LP2ispUlVEh54d5fWLcHlEtskrRHC8psjrarTgqn'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHtzvfIxJ0Lv3W7kqwlmY7CzFQxcbvXZqh4rRp3xBgV1vY01z4BRWA-GFu4INE8yFv9DE-eCib4cYnC-iv_PVgR8yPkBv8uRhI93Yf29MdbDoi_LGu46heOoxRLdMV58jlLI5nr-1sxKdfPutXE_rjuKehCswPGD-9RlbPI8NjyUQ69XAAOjDDhAN-MBxcIt_r3raV86AQfoo1UtYpUoUjhTGVcYBisvHRxv8-XjDjkr65nPm9vdaO7j28yCcokCCeGWv074_AGWeewDQWwczQM'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'coachesvoice',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv'},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'europeanchampionship2024',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGLP8ZiV1gSErFyEW_mBaeUabOdyppbZMUHyMPTq_nC68lIlF28o4vXtvlsLYq7C-ANzy6iwTpWA4ri2fUKevBCGLRotUVZjLX6Au_hnO-mbPGp_Z7nyomkjYhu2iLoPXbmTS8KmWJr8ZAul7j0XQA-S621HaOSBDk0-XBGiKgISgeQb7Tuc-OGj_NMlPQkzK2y4qrs_TBcPgfh5w=='},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'europeanchampionship2024',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGLP8ZiV1gSErFyEW_mBaeUabOdyppbZMUHyMPTq_nC68lIlF28o4vXtvlsLYq7C-ANzy6iwTpWA4ri2fUKevBCGLRotUVZjLX6Au_hnO-mbPGp_Z7nyomkjYhu2iLoPXbmTS8KmWJr8ZAul7j0XQA-S621HaOSBDk0-XBGiKgISgeQb7Tuc-OGj_NMlPQkzK2y4qrs_TBcPgfh5w=='},\n",
" {'label': 'indiatimes',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXESZAkHCLh4VxzVLM3prMQrm-sk5x27L8Z70Q4PKkte0vxhTZVZGCY1s5VfC7u5gECHBavdf1DHRCmh77mAaONSIJ78dcaGelojd2Cd5NuJcyQD8juxOERO1zD147S62xcwKy0GZ9Pb64Yj9cPLEx3fDJvTEm4sn013e_e13dTXUQd4m2yHuO72CfsZSbEq-wVsP47O20GMQXLlZov73MCd1uS1eMq9I5cj1QjiIOjiTC484inoCaShm3LTkXA-Jk5L8GvL'},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFG1hC1YkRcN1pqrLp05aZRwvU2gEv7qauPowe-Co8wgi3HfVrNby2N2i7C3--nu8eYku9ak1DQeH8zJX6XRVKe8psOQ02y3nY5TYcHp-Uk3aZay-sGe4bQZJxVKeF5NS2vtG-h09y3TD_5Aox3V9Yh0z1MYKTBE3Q='},\n",
" {'label': 'mancity',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFs-OAM9wd4bVstgUzRYVeAqGBbUckmq77-BWTs9IkGYZc-WwzHbZ1khSV8T91YQpZkd8c6vZTke-Wgkf4O1SdhMLwYXVj3SsWViVDOT-eeZPBI5v1BuE1Wb0wg9XGzxOl66-faN_8zKvvdm-KEzx5OfL7ytu0i-cG9AzKpZPgi5HNCuLw8PwcbPPxXB_QE5VSuCC5uYGMJ'},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHlzgFESCAkLrjGTw5ZRNnr68tC-GrAg3iL61UJu3ZT8SJX4HYaFE6qOIR8iNpXpJDUMDnTwIpG6IFrT6NPqAbQCSoj_GIPC-eBrrVrUqA8IdzvncYpRAubOVFFkNVBZGbY64I2FiF6wA-biL0bKFD02adziHempLuyjM5YvnOFDR1r0A=='},\n",
" {'label': 'mancity',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFs-OAM9wd4bVstgUzRYVeAqGBbUckmq77-BWTs9IkGYZc-WwzHbZ1khSV8T91YQpZkd8c6vZTke-Wgkf4O1SdhMLwYXVj3SsWViVDOT-eeZPBI5v1BuE1Wb0wg9XGzxOl66-faN_8zKvvdm-KEzx5OfL7ytu0i-cG9AzKpZPgi5HNCuLw8PwcbPPxXB_QE5VSuCC5uYGMJ'},\n",
" {'label': 'mancity',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFs-OAM9wd4bVstgUzRYVeAqGBbUckmq77-BWTs9IkGYZc-WwzHbZ1khSV8T91YQpZkd8c6vZTke-Wgkf4O1SdhMLwYXVj3SsWViVDOT-eeZPBI5v1BuE1Wb0wg9XGzxOl66-faN_8zKvvdm-KEzx5OfL7ytu0i-cG9AzKpZPgi5HNCuLw8PwcbPPxXB_QE5VSuCC5uYGMJ'},\n",
" {'label': 'europeanchampionship2024',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGLP8ZiV1gSErFyEW_mBaeUabOdyppbZMUHyMPTq_nC68lIlF28o4vXtvlsLYq7C-ANzy6iwTpWA4ri2fUKevBCGLRotUVZjLX6Au_hnO-mbPGp_Z7nyomkjYhu2iLoPXbmTS8KmWJr8ZAul7j0XQA-S621HaOSBDk0-XBGiKgISgeQb7Tuc-OGj_NMlPQkzK2y4qrs_TBcPgfh5w=='},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'upthrust',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGAjXBUZABbe0t7dJcEjK-t1A0Gqoyhqd7tS8SrIRQNiNGFC_prv2xazEc-9Xd7vH1V9PgjWB5k8TBWcxtRc8Z2ZHRS3i6cwhdKxLswfDAFFuamfuITm699F648K4tmBYZABT6neMReI4c4sINJAEKqrn6hNzZZjtTt44X78i2dTIOQe74qvl9ofmwm6Q=='},\n",
" {'label': 'indiatimes',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXESZAkHCLh4VxzVLM3prMQrm-sk5x27L8Z70Q4PKkte0vxhTZVZGCY1s5VfC7u5gECHBavdf1DHRCmh77mAaONSIJ78dcaGelojd2Cd5NuJcyQD8juxOERO1zD147S62xcwKy0GZ9Pb64Yj9cPLEx3fDJvTEm4sn013e_e13dTXUQd4m2yHuO72CfsZSbEq-wVsP47O20GMQXLlZov73MCd1uS1eMq9I5cj1QjiIOjiTC484inoCaShm3LTkXA-Jk5L8GvL'},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHcZMzOblU6pNw1gc0QlnRNhCK5VfMY4bW1wPz51w6-AyhvvZnyXcfFxJd4JPdnEfEPD0GB5vHPql6jFppUeKKRysRvwpwaTwaDyFAkvRGab-UAPOOUuK72HsYGrlGVEUHLO6mkzthFd_p8HUaj_JJqlhIaQOuosrZ2y7vf9ouEvd10Uh-rBOqmPRclpkcg3o3WpHhgBY5xNUPEw22V45KhXrqiQhUn5ZSKw3TcsGjla-vA'},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'beinsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA=='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGIh15GjQTH5sloOsTsyL7xu4UxiL1iUhCOmOYuQn2I3oTzOmC8I6vpqG7puUq20dPwFWNyGzUT1m4eiDf3XTrvfO-BRbRz80it26jo1H0Wq6Dr8jI2xYQbW8suUGcHTE6aT6FUa57v2oHiBP2yTBe4FyTP3w-us4RhdAgxy32VvGJhczpHTp36FWBtxK-ESh5KTcPHflNroQkKP0rE17DLYrMfoQVNGf41jeTM2YCvoSeymtFHc-wvySulmtIFlQ=='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGEIQE8ljNdOpLHYgNFDjPekNZfDVP_W7aAbZgzTSgSraVNbalzctN2llZ3do9v9r7sRqxOXioKpebZrVCBnux58qbMLK8wpc4MmOKDRG3bAD8hwE7xMl_InBIfHuIMbuZ_twEC'},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGIh15GjQTH5sloOsTsyL7xu4UxiL1iUhCOmOYuQn2I3oTzOmC8I6vpqG7puUq20dPwFWNyGzUT1m4eiDf3XTrvfO-BRbRz80it26jo1H0Wq6Dr8jI2xYQbW8suUGcHTE6aT6FUa57v2oHiBP2yTBe4FyTP3w-us4RhdAgxy32VvGJhczpHTp36FWBtxK-ESh5KTcPHflNroQkKP0rE17DLYrMfoQVNGf41jeTM2YCvoSeymtFHc-wvySulmtIFlQ=='},\n",
" {'label': 'beinsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA=='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw=='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGEIQE8ljNdOpLHYgNFDjPekNZfDVP_W7aAbZgzTSgSraVNbalzctN2llZ3do9v9r7sRqxOXioKpebZrVCBnux58qbMLK8wpc4MmOKDRG3bAD8hwE7xMl_InBIfHuIMbuZ_twEC'},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEMtKj653gIrpD8CpAaGVkViYwCyEhmCj8w_zAO27Y874XFgkkvvuoVtNU8EXpJiVPDKnShMChgFWK7PnBV3QvbRcOYN268OM1yuPY9DK17q4-9-oGuqw_TYIaEECQxe5JpzVXGBtNidMqlMxM902_iqlm5wQnzMjMO8Vuqj5V3MdMdYj9O_rde6dkewGJFWGMZvPHAySCCMoZPoERD5ErPcaRjpyFQp7VjKoUWvG-mBQkBEn7NP93nxb49ZKVqpt_JhQPlk2HTq-yVyXxh_loL1JE6'},\n",
" {'label': 'beinsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXH4pOTdyyQxNadaQjyKh8oQuOvZAoJv3h8lUaUpf_DBGcg11x3NZ3be0osuI4NKZmKmtGvI4IXelQLdf0gHIZB2h6x13iHVuz5kCoohIkFmaL2HaKjkQlzZw3KDIAr8j3KoVbWNXnx34wDW2qtFTmECR6UBkLFiy0VEjcYwowJ_8ex10JM14KzcvA=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXE1pV-r8eRyTFjoRu12FcAetb5Lb1qPrl-GdPkb649C5b4zo99jtjYGI4y5B2EiF6SE413Ct9omXh3NwD0-r8rGqOMROSYEsfwUaFafM10vFtJGs_eWVcMMLVqgqNELj9BrG4JeBEHbYjDRSlCmVMQcWbIHC28goFDBa-dqi3Q='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'beinsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA=='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw=='},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q=='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig=='},\n",
" {'label': 'foxsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz'},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o='},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-4',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP'},\n",
" {'label': 'olympics',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-6',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe'},\n",
" {'label': 'youtube',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/0-7',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgwKo5lPes5M_GObnkYEzn3QYn1kpTQpx42ANaNqvNMgRsB1Xp2TIXI82SYTSYuLd9ysgKfmlJJy3lcLxrmNBg1R_Z37PCO9vbqIBIbw6DKqMif7pHdtDTS7FUq69c29hkYb_b5w=='},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co='},\n",
" {'label': 'coachesvoice',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv'},\n",
" {'label': 'aljazeera',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU='},\n",
" {'label': 'wikipedia',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-3',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL'},\n",
" {'label': 'newsbytesapp',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/1-5',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s='},\n",
" {'label': 'bet9ja',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/2-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA=='},\n",
" {'label': 'uefa',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-0',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q=='},\n",
" {'label': 'beinsports',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-1',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA=='},\n",
" {'label': 'thehindu',\n",
" 'short_url': 'https://vertexaisearch.cloud.google.com/id/3-2',\n",
" 'value': 'https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw=='}],\n",
" 'initial_search_query_count': 3,\n",
" 'max_research_loops': 3,\n",
" 'research_loop_count': 2}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"state"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Spain won the UEFA Euro 2024 tournament [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL).\n",
"\n",
"In the final match held in Berlin, Germany, Spain defeated England 2-1 [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL). Nico Williams scored the opening goal for Spain, and Mikel Oyarzabal scored the winning goal [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz). Cole Palmer scored England's only goal [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFARil0pwjYQuFrDObawlDzu-eVtUPC4_nINjcXT-mlTL3MDgVPI83UB8gWS1rzGZkaMEmAUIeAzo2ihpMXUsWibzVzeAdQ7nUyqAOq0En87kpfuISduBuWI3__7yJw-vmdApD56-_G2ZhhZC4d_ll2iyNBaZHxxdNqXbb76mUiq99xV0hdoPEkp9RLk7T-uYYfTYXa8oYCXy2ysa9SZDa9hffEHrVe) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz).\n",
"\n",
"This victory marked Spain's record fourth European Championship title [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGFcidniPKtBR-_QjSR1P1Oathq_0T9FTwfpCAWZxbXsroItHQU8zRcyOPDgMcvsWoD2fEnwYFKwanV18ep2_cyS5BlHF6-OFNsijWb-peAgsgLAVRiubekRnzMugsYtiWrhZyO3Q==) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHh_4hBL0Giyuw_cyfT8m7tUSnMqBqH4Lis1CtJICPJNGGLhT6PADTIoUtrj3Rl5qcKNE9T6rzOmedAER_gxJOBDrCF8pnr9lUvhYvmDJxYCJzELkE5rTap4dx6FzOIKZKm1QBp5aHXzd_LCkSTV9ag7Q1A6_t8Vjdbskch6ZG3BoIfjYDQSPgRKDNFAAwt5J07cVFV5pDQzggmM7pxwsUz4drz) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFY5CRvcfjdkBz3h8Md_PscguyZ7LtYrxeHHP3eagcmIOnjaMyZbOHFqUAsa2cgkwvb26FZTvGiRgLKNLfiAsH1oP-5kGwnL6Ejhm4ZXhWGg0R3yE_8zkIKde4RgjIXlBvQW4kZ-LI5yhag-ESoh771z6hob8AigAVXT7WeWABMlQNfcbyG_UZIkqAs18U5e6to44ruNbSyDIyd5gobsVpEmdU256oVxa9d7co=) [coachesvoice](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHxgpkZWF64tZ8-iypkI2fiFi2cpsj4AFjZXkcYUzf5hSOWYb5etIbCoZd_L6zDJi6mWWisxAO6T5V4T8H7XiRow6dmVqXpSEIKhPSdG0HAQbQK74lwxeV_uXx9fSPllIKPOs2tFNRqTuHdJBNcwpcJp6MJbVLEskyhYnWlyOd9ouQv) [aljazeera](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEV-g6Hxxcan5Xre1yYGM3BtP3fo9uF2zHQ9sVeK_4poD-aBN5CRvhz471beYCC26wdrjhtbiCvDT9dAnPI-ruyqJZhwB3vbKS5HCFb9tPn7Dkj99LpjLXqYyuzbFGsHCbr5SCHoMEhNg--dMU7xB5TiH8HeqKH8B4lk_h00dqhEVQFb05w5TuLtbX1UdXN6NDzHlFN_xyXzOU=) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL). Spain achieved this by winning all seven of their matches throughout the tournament [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgwKo5lPes5M_GObnkYEzn3QYn1kpTQpx42ANaNqvNMgRsB1Xp2TIXI82SYTSYuLd9ysgKfmlJJy3lcLxrmNBg1R_Z37PCO9vbqIBIbw6DKqMif7pHdtDTS7FUq69c29hkYb_b5w==) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGd9ZQky3X7RQLbTs6mY1i4Pg7ppcI5H_vtxpvQPiEyD8Qw0f7hjvn3QeoOeAVcCG_pEt5Aeu8ofWCgjwQy4_u6qU-NOOJsYPWOW94XcvtkmKiv46vbNkJF-Mb4OpvBztrDa28BfIdCGHdfF9o=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGZc-qDhRx_v3mPelXEfAVmWCpNTa_rzUKundc0pRc7PlTgppymao-_wO7O1oPaAhJYLcZkazIg8T5jA6t9OGgOxUd_Vl88BjouHsot0OK8TlM5hmPf4ECMWGeJthqVwndE3h4wdQ==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG1Lj9FnmuckfU0k1NC_ThQBZVxFCppp4tPl4FCcM3JZGF9aPvn9ZNFUo0fLfqw4Adt63Cdv8thcFSbsBRcf3rj1sz4LALJvrGfh6OayGo0KJ-UEKmKoOz8cxj5nIILCzKjFh2_0ZgTwrf1pkhhYbnWqj2E8hrVN4S5_sxvlCpLXPxjTsE4R0gYKXH_utqqm1NBkpl3p-C9v6kz-zm6V-JJoePAppIXFICF0DMYjOIBA9Mj0z4yO9Y9Tdgx2oaP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFNtaBQTFVnSbEW5Bbo8LUIs0h5cv4Pc4aS6Q8qG7jIMCsJPKy5_o6R8x7Z_xQ7AuDEAFlj2JY_AVV1YpwLqtXZxiAyvpfboH_VuMpo6MVbQAu2ZASSSD2slWaIqsUGkTEaPa2z2809z7UhEWUL) [newsbytesapp](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIl5Xc3f44I1nYw_YrJqkByrRl20SiAopZqjfJIK6U62o27CrxLvxaJ4v1M7L5eOfTMMlBCHHYCUooPoG0aObaeRG3YxrcoFT7Xtd4KIrvCS6AWWRpOZasCW-sGtFA56DEDf-qbJ8lsXEJ4GQ386iGTdRkyK9EtJWw1mRpDu7dfPQ6Qy1hNIqTgTdo-3yq1WNmWEl8Xtnag0s=).\n",
"\n",
"Key individual awards for the tournament went to Spain's players: Rodri was named the Best Player, and Lamine Yamal was named the Best Young Player [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEk7ApC7Y41UOrTWJ40wP2rsT0VDxqhqF-WJEI-FNKW7SNpR7LoA22sRQecS8hZNeZ_-62Vh7X75RmcmZUtnAOuQunrLAsETkkSx5l75dt9ESgTRkIURwtu4Pew7hn8yFz_LY_FJXUpmRfoWP7MWrDfPHcKrOpfmKqONj6mJcASNvAfCZ0p6qK3K4PvKWye6NyBMyYxWCuJig==0) [bet9ja](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFgj0MP_IEmC842xTfmMPnbybBGYTUb_wEpwJ58keX5x_qPfUmC7Zz0o6IQeQ8TEqoRpv-Uq6oOqfbazu_aP0fMhP7UrSln6rB4SRvCRC327tM1LNaXpiXN-h6xlg0TN_-AWQORV4PSH7G5u2qD_NaNEWkz_oaEHxj22-qOam52fwRvqISOdoFDNTptlM6t0BbhcA==) [uefa](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGKGygrv0aVjWa7JUdwqtuttcPxVIiVFb2_Mxv32q-4AyOVwd8oMKLXq6sl2kw4A37lHLmUUQYqVfDMkX3DLXr4or1Xpx1lnOpIUanPjOtrr2Hk6tPPc0308hdE0xJ5CClC220Tz30xD6538_DOvrVWqfA7pV7x651519Zz37wgqYhN00Ah3LX4QZnW981_-SM8tjVSLDXutPphZBXXmMehNgUynvNd2IiGB9UtkLyGeWINIqR2F7lejStuXJ8U2Q==) [beinsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXExRli0zGmQZlemPPItRH3qShabB-QVHrgUAECeXIs3GUKgd2oIHd45-ULY--TosnkRkiM-XHqZlPxeQlOV6Ktgxb-L5r9Hhf8M-nQS_T0N7NK0BeynreRZtFivuKzwwOByq6uALzoVtombjsREMmsPG7s07CMlMrQjyJCVX8McNdnGC7-mdlHEjdfXN4sgi-YGxdxCdAxaHUaMQxPL0GUUmqDzMMpzVC_lRnrYfuk17UhXI9QhsEi3TMeuUgHu3kl16g1mHA==) [thehindu](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEAlCtejIOwzHPUOAXi7oLu469wYzGUJN86oxtrB6YCAHKAocfkxog6XZeXOUjAl9MTY2_jU5igYEOpyy5RZV2jhxGHtahvQGi8Bq0XkJmaFvludGqwpuBn-vFf-MR3As1CXu9GZNh0TW5f3eLPgvDjB6N3IoYaGhGT8BUiqSyZS6k41T-vL9h6fEFMoOFUYhG2S0AfuVZDuyF2nJHJP1WVWZS42csWXEJUDxqhYjyzmx33HaCxKk0Rbe3_Ovc_Kgdagw==)."
],
"text/plain": [
""
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import Markdown\n",
"\n",
"Markdown(state[\"messages\"][-1].content)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"state = graph.invoke({\"messages\": state[\"messages\"] + [{\"role\": \"user\", \"content\": \"How has the most titles? List the top 5\"}]})"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"Based on the number of UEFA European Championship titles won, Spain holds the record [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [yahoo](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEN_wi1zong77ArtYg3CR5q-wmd_1KK_G72jrRoZv8_QlUFZUvjqyDXL5Co8RzGzWRzP9To5pq0kSBorMh3qgNVcOLGuCbF3giTuegc9yb0k9JqiOTFNYINKJQNAAmMo2ZziC5Iu_F1S5cpLJkKgCXLKk3VLkhqY-hSV1h_ryZRIMjENlsw7Q0=) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2tzG9OJk8Y6nPflMmiUEr826naRNP0ncayg9rczFwi4d_IOq9k99b_7K4ISJPMpAOTzV_VCw8H33rEC6z2N99GWxlB7evrGw__IwY8ZILaE4kYzojFvnmrvRwEdAQsRU2xkUH2AM_VDc6bXduEQBjjkHRi1XFwuY5OvVbGImLznn0dEidu45aQ0Kq) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGCVmrxZzE4ELt_Nl2fd2hYXp7QBGE7bqB8y4I0CRyiGrIB4dMu7krUBEuukEN1Go8KCJme8GuKl3dNqiW-UD7oR6MNWR47NKy1wqoNBxzrgZ2q1nnmjCAqxSeNejx_KTeAkiRVjUBXOQApJL_gUDI4Lwl3BCGcmlNVZGcuP88YN09iuu_stghqGJ7ulc6rUpHEZy9w0SpxUzexpKo27306oSXBPvvYVX6VnltFWvASMRsDfmD3dc94t715Ig04wFvaSIicHg==) [nbcsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHgdp86b29MgI2_Kvo4j5P0Iz8d3W8YgVkRwx31hZAoC9_dL-zyj_jP1vbDkxBiCA8kYArwSMPRVetYZR9WYYAwm3VwowkVtH7slpfObvQLnlHb2SQ386cBdZeZBZmEhgvVFE07YR4Z83RgohnOi26cW1BsZiRYlm1Adh1pgWtiNeiUl7ZNUMxtMQ5XvBx0GM1FpBd1QPsnhjU-pwNz-7ETG_XhsC7ocHEgWyMozF0cJOsEoR-Uye62Q0M=).\n",
"\n",
"Here are the top countries ranked by the number of UEFA European Championship titles:\n",
"\n",
"1. **Spain:** 4 titles (1964, 2008, 2012, 2024) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [yahoo](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEN_wi1zong77ArtYg3CR5q-wmd_1KK_G72jrRoZv8_QlUFZUvjqyDXL5Co8RzGzWRzP9To5pq0kSBorMh3qgNVcOLGuCbF3giTuegc9yb0k9JqiOTFNYINKJQNAAmMo2ZziC5Iu_F1S5cpLJkKgCXLKk3VLkhqY-hSV1h_ryZRIMjENlsw7Q0=) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2tzG9OJk8Y6nPflMmiUEr826naRNP0ncayg9rczFwi4d_IOq9k99b_7K4ISJPMpAOTzV_VCw8H33rEC6z2N99GWxlB7evrGw__IwY8ZILaE4kYzojFvnmrvRwEdAQsRU2xkUH2AM_VDc6bXduEQBjjkHRi1XFwuY5OvVbGImLznn0dEidu45aQ0Kq) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXGCVmrxZzE4ELt_Nl2fd2hYXp7QBGE7bqB8y4I0CRyiGrIB4dMu7krUBEuukEN1Go8KCJme8GuKl3dNqiW-UD7oR6MNWR47NKy1wqoNBxzrgZ2q1nnmjCAqxSeNejx_KTeAkiRVjUBXOQApJL_gUDI4Lwl3BCGcmlNVZGcuP88YN09iuu_stghqGJ7ulc6rUpHEZy9w0SpxUzexpKo27306oSXBPvvYVX6VnltFWvASMRsDfmD3dc94t715Ig04wFvaSIicHg==) [nbcsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHgdp86b29MgI2_Kvo4j5P0Iz8d3W8YgVkRwx31hZAoC9_dL-zyj_jP1vbDkxBiCA8kYArwSMPRVetYZR9WYYAwm3VwowkVtH7slpfObvQLnlHb2SQ386cBdZeZBZmEhgvVFE07YR4Z83RgohnOi26cW1BsZiRYlm1Adh1pgWtiNeiUl7ZNUMxtMQ5XvBx0GM1FpBd1QPsnhjU-pwNz-7ETG_XhsC7ocHEgWyMozF0cJOsEoR-Uye62Q0M=). Spain is also the only nation to have won consecutive titles (2008 and 2012) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2tzG9OJk8Y6nPflMmiUEr826naRNP0ncayg9rczFwi4d_IOq9k99b_7K4ISJPMpAOTzV_VCw8H33rEC6z2N99GWxlB7evrGw__IwY8ZILaE4kYzojFvnmrvRwEdAQsRU2xkUH2AM_VDc6bXduEQBjjkHRi1XFwuY5OvVbGImLznn0dEidu45aQ0Kq) [nbcsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHgdp86b29MgI2_Kvo4j5P0Iz8d3W8YgVkRwx31hZAoC9_dL-zyj_jP1vbDkxBiCA8kYArwSMPRVetYZR9WYYAwm3VwowkVtH7slpfObvQLnlHb2SQ386cBdZeZBZmEhgvVFE07YR4Z83RgohnOi26cW1BsZiRYlm1Adh1pgWtiNeiUl7ZNUMxtMQ5XvBx0GM1FpBd1QPsnhjU-pwNz-7ETG_XhsC7ocHEgWyMozF0cJOsEoR-Uye62Q0M=).\n",
"2. **Germany:** 3 titles (1972, 1980, 1996) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [yahoo](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEN_wi1zong77ArtYg3CR5q-wmd_1KK_G72jrRoZv8_QlUFZUvjqyDXL5Co8RzGzWRzP9To5pq0kSBorMh3qgNVcOLGuCbF3giTuegc9yb0k9JqiOTFNYINKJQNAAmMo2ZziC5Iu_F1S5cpLJkKgCXLKk3VLkhqY-hSV1h_ryZRIMjENlsw7Q0=) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2tzG9OJk8Y6nPflMmiUEr826naRNP0ncayg9rczFwi4d_IOq9k99b_7K4ISJPMpAOTzV_VCw8H33rEC6z2N99GWxlB7evrGw__IwY8ZILaE4kYzojFvnmrvRwEdAQsRU2xkUH2AM_VDc6bXduEQBjjkHRi1XFwuY5OvVbGImLznn0dEidu45aQ0Kq) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXETMGJq-qIccM3rUu2XPme69mRXj51ItevakpVZcxWa26F74sDgeP3slSuSCccKFyv9Xx5P1r4-3kY4ckWQclfnA3leE1ctTGdnIn-5GBRQrjxIwNSlKADP46pBTqgg_LhybRo2at4=). (Note: The first two titles were won as West Germany) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [byfarthegreatestteam](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHbs0yJwq86sGLUvo-8IgZfAus8ZnQcTe_dDlaZhfp7awMYMvU3mq8EL4VYyYWhahCYeRlACi-5eOq4maub0x3GsfKHvhgLBe8KOJG7f-CaPLRKjmRFPRNS2sig2ZCw6VXQeyLq_JWgmbIof5uLvNFqs_qOwFwqZvSmqWKbEnrw2dXMdEmsLKprFbsvAwd9GdMzFr9HojBPpSSk95SzVt2a849gqAGFyIq89tv-mirQaMRqwVUL).\n",
"3. **Italy:** 2 titles (1968, 2021) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [foxsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXF2tzG9OJk8Y6nPflMmiUEr826naRNP0ncayg9rczFwi4d_IOq9k99b_7K4ISJPMpAOTzV_VCw8H33rEC6z2N99GWxlB7evrGw__IwY8ZILaE4kYzojFvnmrvRwEdAQsRU2xkUH2AM_VDc6bXduEQBjjkHRi1XFwuY5OvVbGImLznn0dEidu45aQ0Kq) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=).\n",
"4. **France:** 2 titles (1984, 2000) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [youtube](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXETMGJq-qIccM3rUu2XPme69mRXj51ItevakpVZcxWa26F74sDgeP3slSuSCccKFyv9Xx5P1r4-3kY4ckWQclfnA3leE1ctTGdnIn-5GBRQrjxIwNSlKADP46pBTqgg_LhybRo2at4=).\n",
"5. **Tied with 1 title each:**\n",
" * Soviet Union (1960) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
" * Czechoslovakia (1976) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
" * Netherlands (1988) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
" * Denmark (1992) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
" * Greece (2004) [olympics](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHoAXOG7_3DUjYvRr_buN8IAL3xE5yQoPetZCb1KlcaMOgJEE5BeBoqQEVVkDZLDpwgTmFkPYeWS7i_D23Vd5bKzUTfc0HSLI481VbXjMD9ECeZRFZ17g3xAYLg5I0QU34RWLCRcV_zgphUsJZ0L5gXjpYz5gl8syuYAX3VkHCwh0x6Wqau4er_cZ56CoiA-3S_r2I=) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
" * Portugal (2016) [sportsadda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXEYq2a0benYn0vF2WrfvmqEsgwriQ08aVcDdpS1MUjBxlzaV_scV0ldVeUpwqcgVLCfxgX3oVmbUxbkFPzeHbknsAbxLFk4Iyvtxgacx54AZBnL1szGQ9cQQGOOT8f-zGZhzKWEhAIOYTsz89uAr55R546MlC31OFXiU7AGhMgLi0Ekk6wQvPJVTWs_TiaG4MHoHo0obaRhJK1iPYaAxqHKD2Zf5rTr2jmdPBPd9w==) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFvGr1KOv5qWkUO63kL5-QFEKn41IArXdrcLcMuaCc69bmwu_VsGzE7QI4scHdLjQxYxoFD3eg4ZflqzFcnNk7UJKM5cT8IR13LrrWodcNzotVidnczmVCFCd1-w10ixHS2rgykLdSr8UqFNJ88T2hZL-HL6YCLUUAXJjFP) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXG_0rXuWu0vsaIzCzaUG9Rw0L65I3o3RWhCp4gzXiHDZW3GaJXLntEQNi-O88mGf5LlE0tAkMNd_5VBNOkzIxAkbVsdkpPjwtzuY1sjv2gjtHLnvbIa8Y9jFbdS8kE3xqj95_TayzxNWpyr-XkwY9qLW4NI) [wikipedia](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFXDSskdzRBS66rtxun9egUAz7spzUcDjq30yCoOdFr_SzKkrXBLgAPbzZKVQhx-Z28pOO3phMnr-qIhLuS9zJrp0MTCyIohI6EYxlJ3DpFXTNxneDn9OzNs7sZX_LwKKYA2E-7Mjr46dqZuprKzRn9amiPHusHo3dRWKpOzMSfhXpO) [topendsports](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXHD7IG-bFCCNoc-2iRxXvo468klJLjiQmPdQCkRubtvT83i-Xbpg5XKxyLQB9Yc7qVwRuLjHIB37ywnZ8fdT3fM2ydpLggvdTGxAUVL1M0havCvEQpxiqcmS9LaBnOqWnMOWyy_ztdfTrVihPRb0chKtGeDHA-2EMQlW9ge) [sportingnews](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFAkEqJsPiiUSSUWhHm2Qtc30qed2JljSZ9Nu4JrcJ--CkX_Rif1AO5L0Kxl09j6yo8n5MS9NWdFsXi7KRIg5EJL0d0jm8YA-E4sllbJojNNQDwII8cb1A3b9b5RP3JoFTp2xEYQu914rrEFmRmjsFb44LU8bgGFJijrBG237B67YqLXiQThCPZjP-Gq3BKv3cTxZKseIXSRjaxosiM4LDpxDxZPAdpeIIpb3aiH7w_IyC8dWXpCyoHZYZfYe6EGNXfkmE=) [sportskeeda](https://vertexaisearch.cloud.google.com/grounding-api-redirect/AbF9wXFIIs54t2O4RL3Y8cvfgbTs_CjcMeedsitpXc8PsaUZXOA7sohYdYTdTiN1-kLAhtXi2UZT4A-iIPZ6ufIpvuD_53Qtr4zqZnqZ6ox74EgyOPyjxs9k1qS_Gq1kR57IdjnMG4JbC7y9nVq2xZRNevSC-PJLSfCoLc36ahu6Xp6Fssl1Yw8LjgX2ranBbG72OvyijpJj1UygG-SVqr7h0y-DECQ=)\n",
"\n",
"Therefore, while Spain has the most titles, the top 5 ranking positions are held by Spain (1st), Germany (2nd), Italy and France (tied 3rd), and the six nations tied for 5th place."
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Markdown(state[\"messages\"][-1].content)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
================================================
FILE: docker-compose.yml
================================================
volumes:
langgraph-data:
driver: local
services:
langgraph-redis:
image: docker.io/redis:6
healthcheck:
test: redis-cli ping
interval: 5s
timeout: 1s
retries: 5
langgraph-postgres:
image: docker.io/postgres:16
ports:
- "5433:5432"
environment:
POSTGRES_DB: postgres
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
volumes:
- langgraph-data:/var/lib/postgresql/data
healthcheck:
test: pg_isready -U postgres
start_period: 10s
timeout: 1s
retries: 5
interval: 5s
langgraph-api:
image: gemini-fullstack-langgraph
ports:
- "8123:8000"
depends_on:
langgraph-redis:
condition: service_healthy
langgraph-postgres:
condition: service_healthy
environment:
GEMINI_API_KEY: ${GEMINI_API_KEY}
LANGSMITH_API_KEY: ${LANGSMITH_API_KEY}
REDIS_URI: redis://langgraph-redis:6379
POSTGRES_URI: postgres://postgres:postgres@langgraph-postgres:5432/postgres?sslmode=disable
================================================
FILE: docs/document-generation-flow-ZH.md
================================================
# 文档生成流程:从查询到综合研究报告
## 增强的Agent工作流

*增强的agent工作流包含智能内容增强和双层评估系统,确保研究质量的全面性。*
## 目录
1. [概述](#概述)
2. [架构与设计原则](#架构与设计原则)
3. [状态管理](#状态管理)
4. [节点逐一分析](#节点逐一分析)
5. [数据流与转换](#数据流与转换)
6. [提示工程与LLM集成](#提示工程与LLM集成)
7. [错误处理与容错性](#错误处理与容错性)
8. [批量生成机制](#批量生成机制)
9. [内容质量保证](#内容质量保证)
10. [性能优化](#性能优化)
11. [系统修复与改进](#系统修复与改进)
12. [报告级别内容增强](#报告级别内容增强)
13. [引用系统与URL管理](#引用系统与URL管理)
14. [未来增强功能](#未来增强功能)
## 概述
基于LangGraph的研究代理是一个复杂的多步骤系统,旨在将简单的用户查询转换为全面、结构良好的研究报告。本文档深入分析了系统如何协调多个AI代理、管理复杂的状态转换,并确保生成详细、事实准确的文档。
### 核心目标
该代理的主要目标是通过以下方式解决传统单提示AI交互的局限性:
1. **将复杂的研究任务分解**为可管理的、专注的子任务
2. **进行迭代研究**,包含反思和改进循环
3. **在多个研究阶段保持上下文一致性**
4. **生成综合报告**,充分利用现代LLM的完整上下文窗口
5. **通过适当的引用和源管理确保事实准确性**,使用真实、可访问的URL
6. **跟踪任务特定结果**,实现详细的内容综合
7. **实施双层内容增强**,确保全面的信息覆盖
### 系统架构理念
该代理遵循**多代理编排模式**,其中专门的节点处理研究管道的特定方面:
- **规划代理**:将用户查询分解为结构化的研究计划
- **查询生成器**:为特定研究目标创建有针对性的搜索查询
- **网络研究代理**:执行搜索并与任务关联合成发现
- **反思代理**:评估研究完整性并识别差距
- **任务协调器**:管理多任务工作流和状态转换
- **报告级别增强器**:对识别的信息差距执行有针对性的深度增强
- **文档合成器**:使用批量处理和真实引用URL生成最终综合报告
## 架构与设计原则
### LangGraph状态管理
系统利用LangGraph的复杂状态管理功能来维持多个执行阶段的上下文。状态模式设计支持全面的任务跟踪和结果组织:
```python
class OverallState(TypedDict):
messages: Annotated[list, add_messages]
user_query: str
plan: list # 存储planner_node生成的任务计划
current_task_pointer: int # 指向计划中的当前任务
executed_search_queries: Annotated[list, operator.add]
web_research_result: Annotated[list, operator.add]
sources_gathered: Annotated[list, operator.add]
initial_search_query_count: int
max_research_loops: int
research_loop_count: int
reasoning_model: str
# 多任务迭代支持
ledger: Annotated[List[LedgerEntry], operator.add]
global_summary_memory: Annotated[List[str], operator.add]
# 增强的结果跟踪
current_task_detailed_findings: Annotated[List[Dict[str, Any]], operator.add]
task_specific_results: Annotated[List[Dict[str, Any]], operator.add] # 新增:任务关联结果
final_report_markdown: Optional[str]
```
这种设计确保:
- **状态持久性**:关键信息在节点转换过程中得到维护
- **任务关联**:研究结果与其发起任务正确链接
- **并行执行**:多个研究查询可以同时处理
- **增量构建**:结果通过管道逐步累积
- **上下文保持**:早期发现为后续研究决策提供信息
### 关键状态管理修复
最近对系统的改进解决了关键的状态传播问题:
1. **任务ID传播**:向`WebSearchState`添加了`current_task_id`字段以确保正确的任务关联
2. **状态连续性**:向中间状态添加了`plan`和`current_task_pointer`字段
3. **结果组织**:为有组织的内容跟踪引入了`task_specific_results`
4. **错误恢复**:增强错误处理以在故障期间保持任务关联
### 模块化节点设计
图中的每个节点都有特定的目的,可以独立优化:
1. **单一职责**:每个节点都有一个主要功能
2. **清晰接口**:节点间标准化输入/输出契约
3. **错误隔离**:一个节点的故障不会级联传播到系统
4. **可配置行为**:运行时配置允许不同的执行策略
### 提示工程架构
系统采用复杂的提示工程策略,包括:
- **基于角色的指令**:每个代理都有明确定义的角色和行为准则
- **结构化输出要求**:JSON模式确保一致的数据交换
- **上下文感知提示**:提示根据当前研究状态自适应
- **示例驱动学习**:提示包含相关示例以指导LLM行为
## 状态管理
### 通过管道的状态演化
系统的状态在通过不同阶段时经历系统性转换:
#### 初始状态(用户查询输入)
```json
{
"messages": [{"role": "user", "content": "研究问题在此"}],
"user_query": "研究问题在此",
"plan": [],
"current_task_pointer": 0
}
```
#### 规划阶段状态
```json
{
"user_query": "研究问题在此",
"plan": [
{
"id": "task-1",
"description": "具体研究目标",
"info_needed": true,
"source_hint": "搜索关键词",
"status": "pending"
}
],
"current_task_pointer": 0
}
```
#### 研究执行状态
```json
{
"query_list": ["搜索查询1", "搜索查询2"],
"web_research_result": ["详细发现1", "详细发现2"],
"task_specific_results": [
{
"task_id": "task-1",
"content": "研究内容",
"sources": ["url1", "url2"],
"timestamp": "2024-01-01T12:00:00"
}
]
}
```
#### 最终报告状态
```json
{
"ledger": [
{
"task_id": "task-1",
"findings_summary": "关键发现摘要",
"detailed_snippets": ["详细内容"],
"citations_for_snippets": [{"snippet": "内容", "source": "url"}]
}
],
"final_report_markdown": "完整markdown报告"
}
```
### 状态验证与完整性
系统实施多种机制来确保状态完整性:
1. **类型安全**:TypedDict定义防止无效状态变更
2. **验证检查**:每个节点在处理前验证其必需输入
3. **回退机制**:默认值和错误恢复防止系统故障
4. **状态记录**:全面记录跟踪状态演化以进行调试
## 节点逐一分析
### 1. 规划节点
规划节点作为系统的战略智能,将非结构化用户查询转换为可执行的研究计划。
#### 功能概述
规划器采用先进的提示工程来:
- 分析用户查询的意图和范围
- 识别关键研究维度
- 生成结构化、顺序的研究任务
- 为每个任务提供搜索提示
#### 提示设计策略
规划提示的结构旨在最大化LLM推理能力:
```markdown
你是**PlannerAgent**。你的工作是将用户研究查询转换为可执行的研究计划。
=== 输出格式 ===
返回具有特定字段要求的单个JSON数组...
=== 要求 ===
1. 深入分析查询;识别核心目标
2. 如果清晰度不足,写出澄清问题
3. 产生具有逻辑顺序的多步骤计划
```
#### 关键实现细节
规划节点包含几个复杂功能:
**结构化输出验证**:使用LangChain的`with_structured_output`确保一致的JSON格式。
**错误恢复**:当结构化规划失败时实施回退逻辑:
```python
except Exception as e:
return {
"plan": [{"id": "task-1", "description": f"研究:{user_query}"}],
"current_task_pointer": 0
}
```
**查询分析**:优先考虑明确的用户查询,同时保持对消息历史的回退。
#### 规划质量因素
规划器的有效性取决于:
1. **适当范围分解**:将复杂主题分解为可管理的块
2. **逻辑任务排序**:确保早期任务为后期任务提供信息
3. **搜索优化**:为每个任务提供有效的搜索提示
4. **完整性**:涵盖研究主题的所有方面
### 2. 查询生成节点
查询生成节点将高级研究目标转换为特定的、有针对性的网络搜索查询,同时确保任务跟踪的正确状态传播。
#### 战略查询制作
节点采用多种策略生成有效查询:
1. **多样性最大化**:创建探索主题不同方面的查询
2. **特异性优化**:平衡广泛覆盖和有针对性的精确性
3. **时效性意识**:为时间敏感主题纳入当前日期信息
4. **源多样化**:生成可能返回不同类型源结果的查询
5. **任务上下文意识**:生成与当前研究任务特别对齐的查询
#### 增强状态管理
节点包含状态传播的关键修复:
```python
def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
# 任务感知查询生成
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"] # 使用当前任务描述
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# 为特定任务生成查询
result = structured_llm.invoke(formatted_prompt)
# 修复:确保状态传播
return {
"query_list": result.query,
"plan": state.get("plan", []), # 传播计划
"current_task_pointer": state.get("current_task_pointer", 0) # 传播指针
}
```
#### 查询生成的提示工程
查询生成提示包括:
```markdown
你是一个**QueryGenerator**,负责创建复杂的网络搜索查询。
=== 要求 ===
1. 每个查询应专注于一个特定方面
2. 查询应该多样化且互补
3. 最多允许{number_queries}个查询
4. 确保查询针对当前信息
5. 避免冗余或过于相似的查询
```
#### 任务到查询的转换
关键增强是`continue_to_web_research`函数,它正确关联查询与任务:
```python
def continue_to_web_research(state: QueryGenerationState):
# 从传播状态获取当前任务信息
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task_id = "unknown"
if plan and current_pointer < len(plan):
current_task_id = plan[current_pointer]["id"] # 提取实际任务ID
return [
Send("web_research", {
"search_query": search_query,
"id": int(idx),
"current_task_id": current_task_id # 修复:正确的任务关联
})
for idx, search_query in enumerate(state["query_list"])
]
```
#### 查询质量评估
生成的查询基于以下标准评估:
- **相关性**:与研究目标的直接联系
- **特异性**:有效搜索的适当详细级别
- **多样性**:覆盖不同方面或观点
- **可搜索性**:返回高质量结果的可能性
- **任务对齐**:与特定研究任务的对齐
### 3. 网络研究节点
网络研究节点代表系统与外部知识源的接口,利用Google的搜索API收集全面信息。
#### 多模态研究执行
研究过程包含:
1. **原生Google搜索集成**:使用Google的GenAI客户端和搜索工具
2. **接地元数据处理**:提取和处理源归属
3. **URL解析**:将搜索结果转换为可管理的引用格式
4. **内容合成**:将搜索结果合并为连贯的发现
#### 引用与源管理
系统实施复杂的源跟踪,保持真实URL:
```python
resolved_urls = resolve_urls(
response.candidates[0].grounding_metadata.grounding_chunks,
state["id"]
)
citations = get_citations(response, resolved_urls)
modified_text = insert_citation_markers(response.text, citations)
```
这确保:
- **归属准确性**:每个声明都链接到其来源
- **真实URL保持**:原始URL得以维护,便于用户访问(已修复)
- **引用集成**:源无缝嵌入研究文本中,提供可验证的链接
#### 错误处理与恢复
网络研究节点包含全面的错误处理,即使在故障期间也能保持任务关联:
```python
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
try:
# 主要研究执行逻辑
response = genai_client.models.generate_content(...)
# 处理成功响应
current_task_id = state.get("current_task_id", "unknown")
detailed_finding = {
"task_id": current_task_id, # 保持任务关联
"query_id": state["id"],
"content": modified_text,
"source": sources_gathered[0] if sources_gathered else None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # 保持任务关联
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": sources_gathered,
"executed_search_queries": [state["search_query"]],
"web_research_result": [modified_text],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result] # 增强结构
}
except Exception as e:
# 增强:错误处理保持任务上下文
current_task_id = state.get("current_task_id", "unknown")
error_message = f"网络研究期间出错:{str(e)}"
detailed_finding = {
"task_id": current_task_id, # 即使在错误中也保持任务关联
"query_id": state["id"],
"content": error_message,
"source": None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # 即使在错误中也保持任务关联
"content": error_message,
"sources": [],
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": [],
"executed_search_queries": [state["search_query"]],
"web_research_result": [error_message],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
```
这种增强的错误处理确保:
- **任务关联保持**:即使在API故障期间,任务ID也得以维护
- **完整状态更新**:在错误场景中,所有必需的状态字段都得到填充
- **优雅降级**:系统继续运行,错误信息得到正确跟踪
- **调试支持**:错误消息包含完整的故障排除上下文
#### 任务特定结果组织
系统的关键增强是按任务组织研究结果:
```python
task_specific_result = {
"task_id": current_task_id,
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
```
这种结构使得:
- **任务关联**:结果清楚地链接到其发起的研究任务
- **时间跟踪**:时间戳支持发现的时间组织
- **源保持**:每个结果的引用信息得到维护
### 4. 内容增强节点
内容增强节点代表研究管道中的关键创新,实现针对性深度内容抓取的智能决策制定,集成Firecrawl功能。
#### 智能增强决策
内容增强过程采用复杂分析来确定何时需要额外的深度:
```python
def content_enhancement_analysis(state: OverallState, config: RunnableConfig) -> dict:
# 分析当前研究上下文
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
# 确定研究主题
if plan and current_pointer < len(plan):
research_topic = plan[current_pointer]["description"]
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# 获取当前发现和来源
current_findings = state.get("web_research_result", [])
grounding_sources = extract_grounding_sources(state)
# 使用智能决策器
decision = get_content_enhancement_decision_maker().analyze_enhancement_need(
research_topic=research_topic,
current_findings=current_findings,
grounding_sources=grounding_sources,
config=config
)
```
#### 增强决策标准
系统评估多个因素来确定增强需求:
1. **内容深度分析**:评估当前发现是否提供足够的细节
2. **源质量评估**:确定是否有更高质量的源可用
3. **信息差距检测**:识别需要深入调查的特定领域
4. **资源效率**:平衡增强效益与计算成本
#### Firecrawl集成策略
当认为需要增强时,系统:
1. **优先排序URL**:选择最有前景的深度抓取来源
2. **执行目标抓取**:使用Firecrawl进行全面内容提取
3. **质量评估**:评估增强内容的有效性
4. **内容集成**:将增强发现与现有研究合并
#### 增强类型
系统支持多种增强类别:
- **技术深度探索**:详细的技术规范和实施细节
- **市场情报**:当前市场数据和竞争分析
- **案例研究**:真实世界的实施示例和结果
- **监管信息**:政策、标准和合规要求
### 5. 增强研究评估节点
增强研究评估节点代表传统反思过程的演进,将内容增强结果纳入研究充分性评估。
#### 智能评估集成
评估过程考虑传统内容分析之外的多个因素:
```python
def evaluate_research_enhanced(state: OverallState, config: RunnableConfig) -> dict:
# 获取反思结果
reflection_is_sufficient = state.get("reflection_is_sufficient", False)
# 检查增强状态和有效性
enhancement_status = state.get("enhancement_status")
enhanced_sources_count = state.get("enhanced_sources_count", 0)
# 结合反思和增强结果的智能决策
is_sufficient = reflection_is_sufficient
# 增强效果评估
if not is_sufficient and enhancement_status == "completed" and enhanced_sources_count > 0:
enhancement_boost = min(enhanced_sources_count * 0.3, 0.8)
if enhancement_boost >= 0.6:
is_sufficient = True
```
#### 多维度评估
增强评估考虑:
1. **传统反思结果**:基于LLM的完整性评估
2. **增强有效性**:内容增强的成功和影响
3. **源多样性**:利用的信息来源广度
4. **内容深度**:研究目标的全面覆盖
5. **质量指标**:整体研究质量指标
#### 动态决策制定
评估节点实施复杂的路由逻辑:
- **继续研究**:尽管增强但仍有重大差距时
- **完成任务**:研究目标得到充分解决时
- **自适应阈值**:基于增强成功的动态调整
## 报告级别内容增强
报告级别内容增强系统代表了一项重要创新,解决了在最终报告合成阶段识别的信息差距。这种双层增强方法确保了研究主题的全面覆盖。
### 架构概述
报告级别增强系统作为`finalize_answer`节点中的预分析步骤运行,LLM可以在生成最终报告之前识别特定的信息差距。这种有针对性的方法通过专注于跨任务信息需求和合成要求,与任务级增强不同。
#### 关键组件
1. **ReportLevelEnhancer**:主要增强协调类
2. **增强请求分析**:LLM驱动的差距识别
3. **有针对性的Firecrawl集成**:选择性深度网络抓取
4. **质量评估**:增强效果评估
### 实施策略
增强过程遵循结构化方法:
```python
def integrate_report_enhancement_into_finalize(
user_query: str,
research_plan: List[Dict],
aggregated_research_data: str,
available_sources: List[Dict[str, Any]],
config: RunnableConfig
) -> Tuple[str, List[ReportEnhancementResult]]:
enhancer = ReportLevelEnhancer()
# 1. 分析增强需求
enhancement_requests = enhancer.analyze_report_enhancement_needs(
user_query, research_plan, aggregated_research_data, config
)
if not enhancement_requests:
print("✅ 报告级别分析:当前信息充分")
return aggregated_research_data, []
# 2. 执行有针对性的增强
enhancement_results = enhancer.execute_targeted_enhancement(
enhancement_requests, available_sources
)
# 3. 合并增强内容
enhanced_data = aggregated_research_data
successful_enhancements = [r for r in enhancement_results if r.success]
if successful_enhancements:
for result in successful_enhancements:
enhanced_data += f"\n\n## 报告级别深度增强\n{result.enhanced_content}"
return enhanced_data, enhancement_results
```
### 增强类型与定位
系统识别几类增强需求:
1. **具体数据与统计**:量化数据差距
2. **实施案例与技术细节**:具体示例和技术规范
3. **市场数据与竞争分析**:当前市场信息
4. **政策、法规与标准**:监管框架覆盖
### 质量保证
增强结果经过质量评估:
```python
def _assess_enhancement_quality(content: str, request: ReportEnhancementRequest) -> str:
length = len(content)
target_keywords = request.target_information.lower().split()
keyword_matches = sum(1 for keyword in target_keywords if keyword in content.lower())
keyword_ratio = keyword_matches / len(target_keywords) if target_keywords else 0
if length > 2000 and keyword_ratio > 0.6:
return "excellent"
elif length > 1000 and keyword_ratio > 0.4:
return "good"
elif length > 500 and keyword_ratio > 0.2:
return "fair"
else:
return "poor"
```
## 引用系统与URL管理
### 关键URL管理修复
一项重大系统改进解决了引用URL系统的根本缺陷,该缺陷为用户生成不可访问的引用。
#### 问题识别
对生产结果的分析显示,引用包含以下格式的URL:
```
[source](https://vertexaisearch.cloud.google.com/id/x-x)
```
这些URL是Google Vertex AI Search的内部引用,最终用户无法访问,使整个引用系统对源验证无效。
#### 根本原因分析
问题源于`utils.py`中的`resolve_urls`函数:
```python
# 有问题的原始实现
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
prefix = f"https://vertexaisearch.cloud.google.com/id/"
urls = [site.web.uri for site in urls_to_resolve]
for idx, url in enumerate(urls):
if url not in resolved_map:
resolved_map[url] = f"{prefix}{id}-{idx}" # 创建假URL!
```
这个函数错误地将真实的、可访问的URL转换为假的内部引用。
#### 解决方案实施
修复保持了原始URL,同时维护去重功能:
```python
# 修复后的实现
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
"""
创建保持原始URL而不是用假内部ID替换的映射。
这确保引用指向真实、可访问的网络源。
"""
urls = [site.web.uri for site in urls_to_resolve]
resolved_map = {}
for idx, url in enumerate(urls):
if url not in resolved_map:
resolved_map[url] = url # 保持原始URL!
return resolved_map
```
#### 影响评估
修复提供了几个关键改进:
1. **用户可访问性**:引用现在指向真实的、可点击的URL
2. **源验证**:用户可以通过访问原始源来验证信息
3. **专业标准**:报告符合学术和专业引用要求
4. **系统完整性**:没有源跟踪功能损失
#### 增强引用处理
引用系统现在通过整个管道正确处理URL保持:
```python
def convert_citations_to_readable(content, source_mapping):
def replace_citation(match):
citation_id = match.group(1)
if citation_id in source_mapping:
source_info = source_mapping[citation_id]
domain = source_info.get('domain', 'Unknown Source')
url = source_info.get('value', '')
label = source_info.get('label', domain)
# 使用真实、可访问的URL格式化
if url and url.startswith('http') and 'vertexaisearch.cloud.google.com' not in url:
return f"[Source: {label} ({url})]"
else:
return f"[Source: {label}]"
return f"[Source: {citation_id}]"
# 转换引用同时保持真实URL
content = re.sub(r'\[vertexaisearch\.cloud\.google\.com/id/([^\]]+)\]',
replace_citation, content)
return content
```
### 引用质量验证
系统现在包含确保引用质量的验证:
1. **URL可访问性**:验证URL不是内部系统引用
2. **源归属**:内容与原始源的正确链接
3. **格式一致性**:报告中标准化的引用格式
4. **完整性**:所有事实声明包含适当的源归属
## 系统修复与改进
### 最近的关键修复(最新更新)
#### 引用URL管理修复(2024年12月)
**问题**:生成报告中的所有引用都指向不可访问的`vertexaisearch.cloud.google.com`内部URL,而不是真实的源URL。
**解决方案**:修改`utils.py`中的`resolve_urls`函数以保持原始URL:
```python
# 修复为保持真实URL而不是创建假URL
resolved_map[url] = url # 保持原始URL
```
**影响**:引用现在提供真实、可访问的URL进行源验证。
#### 报告级别增强系统(2024年12月)
**新增**:实施双层内容增强系统,包括:
- 信息差距识别的预分析
- 特定缺失数据的有针对性Firecrawl集成
- 质量评估和智能增强决策
**影响**:具有识别差距的有针对性深度增强的综合报告。
### 监控与维护
持续监控包括:
- 生产日志中的任务ID传播验证
- 账本条目完整性跟踪
- 保持任务上下文的错误率监控
- 通过内容指标进行报告质量评估
- 引用URL可访问性验证(新增)
- 报告级别增强效果跟踪(新增)
## 未来增强功能
### 计划改进
#### 高级引用管理
- **学术格式支持**:APA、MLA和其他引用样式
- **源质量评估**:源可信度的自动评估
- **引用去重**:智能处理重复源
#### 内容增强功能
- **视觉内容集成**:报告中的图表、图形和图表
- **多媒体支持**:视频和音频源的集成
- **交互元素**:可展开部分和动态内容
#### 质量保证增强
- **自动事实检查**:对可靠源的交叉引用验证
- **偏见检测**:内容偏见的识别和缓解
- **完整性评分**:研究彻底性的量化评估
### 可扩展性考虑
#### 系统架构
- **微服务分解**:将系统分解为独立可扩展的组件
- **数据库集成**:大规模研究项目的持久存储
- **负载均衡**:跨多个实例的处理分布
#### 性能优化
- **缓存层**:多级缓存以改善响应时间
- **异步处理**:非阻塞执行以更好地利用资源
- **流处理**:大型文档的实时结果流
### 集成可能性
#### 外部系统集成
- **学术数据库**:与学术研究平台的直接集成
- **企业系统**:连接到组织知识库
- **协作平台**:多用户研究和编辑功能
#### API和开发者体验
- **RESTful API**:外部集成的标准化接口
- **SDK开发**:易于集成的特定语言库
- **Webhook支持**:与外部系统的事件驱动集成
## 结论
基于LangGraph的研究代理代表了自动化研究和文档生成的重大进步。通过协调多个专门的AI代理、实施复杂的状态管理和利用先进的提示工程技术,系统将简单的用户查询转换为全面、研究充分的文档。
关键创新包括:
1. **多代理编排**:专门代理处理研究管道的不同方面
2. **迭代研究过程**:反思和改进循环确保全面覆盖
3. **批量生成机制**:大型LLM上下文窗口的高效利用
4. **状态管理**:具有正确任务关联的研究进度和发现的复杂跟踪
5. **质量保证**:多层验证和错误处理
6. **系统恢复力**:在故障条件下保持数据完整性的强大错误处理
### 最近改进
系统经历了重大改进,解决了任务跟踪和内容组织的关键问题:
- **增强状态管理**:通过所有管道阶段的完整任务上下文传播
- **改进数据组织**:更好内容合成的任务特定结果跟踪
- **强大错误处理**:保持任务关联的优雅降级
- **全面测试**:确保系统可靠性的验证框架
系统的设计既优先考虑质量又考虑可扩展性,使其适用于从学术工作到商业智能的广泛研究应用。模块化架构支持持续改进和特定用例的定制。
随着AI能力的持续发展,像这样的系统将变得越来越重要,用于增强人类研究能力并使高质量研究输出的访问民主化。这里建立的基础,具有其强大的状态管理和任务跟踪能力,为未来的增强和专门应用提供了坚实的平台。
这里提供的综合文档既作为技术参考,也作为类似系统的设计指南。通过理解本文档中概述的原则和实施细节,开发人员可以在此基础上构建更复杂的研究和文档生成系统。
通过对提示工程、状态管理、错误处理和质量保证的精心关注,该系统展示了现代AI技术如何被编排产生在全面性和质量上与人类生成的研究报告相媲美的输出。自动化研究的未来在于结合大型语言模型的推理能力与本实施中展示的系统方法和质量控制的系统。
================================================
FILE: docs/document-generation-flow.md
================================================
# Document Generation Flow: From Query to Comprehensive Research Report
## Enhanced Agent Workflow

*The enhanced agent workflow includes intelligent content enhancement and dual-layer evaluation systems for comprehensive research quality.*
## Table of Contents
1. [Overview](#overview)
2. [Architecture and Design Principles](#architecture-and-design-principles)
3. [State Management](#state-management)
4. [Node-by-Node Analysis](#node-by-node-analysis)
5. [Data Flow and Transformations](#data-flow-and-transformations)
6. [Prompt Engineering and LLM Integration](#prompt-engineering-and-llm-integration)
7. [Error Handling and Resilience](#error-handling-and-resilience)
8. [Batch Generation Mechanism](#batch-generation-mechanism)
9. [Content Quality Assurance](#content-quality-assurance)
10. [Performance Optimization](#performance-optimization)
11. [System Fixes and Improvements](#system-fixes-and-improvements)
12. [Report-Level Content Enhancement](#report-level-content-enhancement)
13. [Citation System and URL Management](#citation-system-and-url-management)
14. [Future Enhancements](#future-enhancements)
## Overview
The LangGraph-based research agent represents a sophisticated multi-step system designed to transform simple user queries into comprehensive, well-structured research reports. This document provides an in-depth analysis of how the system orchestrates multiple AI agents, manages complex state transitions, and ensures the generation of detailed, factually accurate documents.
### Core Objectives
The primary goal of this agent is to address the limitations of traditional single-prompt AI interactions by:
1. **Breaking down complex research tasks** into manageable, focused subtasks
2. **Conducting iterative research** with reflection and refinement cycles
3. **Maintaining context coherence** across multiple research phases
4. **Generating comprehensive reports** that leverage the full context window of modern LLMs
5. **Ensuring factual accuracy** through proper citation and source management with real, accessible URLs
6. **Tracking task-specific results** for detailed content synthesis
7. **Implementing dual-layer content enhancement** for comprehensive information coverage
### System Architecture Philosophy
The agent follows a **multi-agent orchestration pattern** where specialized nodes handle specific aspects of the research pipeline:
- **Planning Agent**: Decomposes user queries into structured research plans
- **Query Generator**: Creates targeted search queries for specific research objectives
- **Web Research Agent**: Executes searches and synthesizes findings with task association
- **Reflection Agent**: Evaluates research completeness and identifies gaps
- **Task Coordinator**: Manages multi-task workflows and state transitions
- **Report-Level Enhancer**: Performs targeted deep enhancement for identified information gaps
- **Document Synthesizer**: Generates final comprehensive reports using batch processing with real citation URLs
## Architecture and Design Principles
### LangGraph State Management
The system utilizes LangGraph's sophisticated state management capabilities to maintain context across multiple execution phases. The state schema is designed to support comprehensive task tracking and result organization:
```python
class OverallState(TypedDict):
messages: Annotated[list, add_messages]
user_query: str
plan: list # Store task plan generated by planner_node
current_task_pointer: int # Point to current task in plan
executed_search_queries: Annotated[list, operator.add]
web_research_result: Annotated[list, operator.add]
sources_gathered: Annotated[list, operator.add]
initial_search_query_count: int
max_research_loops: int
research_loop_count: int
reasoning_model: str
# Multi-task iteration support
ledger: Annotated[List[LedgerEntry], operator.add]
global_summary_memory: Annotated[List[str], operator.add]
# Enhanced result tracking
current_task_detailed_findings: Annotated[List[Dict[str, Any]], operator.add]
task_specific_results: Annotated[List[Dict[str, Any]], operator.add] # NEW: Task-associated results
final_report_markdown: Optional[str]
class QueryGenerationState(TypedDict):
query_list: list[Query]
# FIXED: Added state propagation fields
plan: list
current_task_pointer: int
class ReflectionState(TypedDict):
is_sufficient: bool
knowledge_gap: str
follow_up_queries: Annotated[list, operator.add]
research_loop_count: int
number_of_ran_queries: int
# FIXED: Added state propagation fields
plan: list
current_task_pointer: int
class WebSearchState(TypedDict):
search_query: str
id: str
current_task_id: str # FIXED: Added for task association
```
This design ensures that:
- **State persistence**: Critical information is maintained across node transitions
- **Task association**: Research results are properly linked to their originating tasks
- **Parallel execution**: Multiple research queries can be processed simultaneously
- **Incremental building**: Results accumulate progressively through the pipeline
- **Context preservation**: Earlier findings inform later research decisions
### Key State Management Fixes
Recent improvements to the system addressed critical state propagation issues:
1. **Task ID Propagation**: Added `current_task_id` field to `WebSearchState` to ensure proper task association
2. **State Continuity**: Added `plan` and `current_task_pointer` fields to intermediate states
3. **Result Organization**: Introduced `task_specific_results` for organized content tracking
4. **Error Resilience**: Enhanced error handling to preserve task associations even during failures
### Modular Node Design
Each node in the graph serves a specific purpose and can be independently optimized:
1. **Single Responsibility**: Each node has one primary function
2. **Clear Interfaces**: Standardized input/output contracts between nodes
3. **Error Isolation**: Failures in one node don't cascade through the system
4. **Configurable Behavior**: Runtime configuration allows for different execution strategies
### Prompt Engineering Architecture
The system employs a sophisticated prompt engineering strategy that includes:
- **Role-based Instructions**: Each agent has a clearly defined role and behavioral guidelines
- **Structured Output Requirements**: JSON schemas ensure consistent data exchange
- **Context-aware Prompting**: Prompts adapt based on current research state
- **Example-driven Learning**: Prompts include relevant examples to guide LLM behavior
## State Management
### State Evolution Through the Pipeline
The system's state undergoes systematic transformations as it progresses through different phases:
#### Initial State (User Query Input)
```json
{
"messages": [{"role": "user", "content": "Research question here"}],
"user_query": "Research question here",
"plan": [],
"current_task_pointer": 0
}
```
#### Planning Phase State
```json
{
"user_query": "Research question here",
"plan": [
{
"id": "task-1",
"description": "Specific research objective",
"info_needed": true,
"source_hint": "Search keywords",
"status": "pending"
}
],
"current_task_pointer": 0
}
```
#### Research Execution State
```json
{
"query_list": ["search query 1", "search query 2"],
"web_research_result": ["detailed finding 1", "detailed finding 2"],
"task_specific_results": [
{
"task_id": "task-1",
"content": "Research content",
"sources": ["url1", "url2"],
"timestamp": "2024-01-01T12:00:00"
}
]
}
```
#### Final Report State
```json
{
"ledger": [
{
"task_id": "task-1",
"findings_summary": "Key findings summary",
"detailed_snippets": ["detailed content"],
"citations_for_snippets": [{"snippet": "content", "source": "url"}]
}
],
"final_report_markdown": "Complete markdown report"
}
```
### State Validation and Integrity
The system implements several mechanisms to ensure state integrity:
1. **Type Safety**: TypedDict definitions prevent invalid state mutations
2. **Validation Checks**: Each node validates its required inputs before processing
3. **Fallback Mechanisms**: Default values and error recovery prevent system failures
4. **State Logging**: Comprehensive logging tracks state evolution for debugging
## Node-by-Node Analysis
### 1. Planner Node
The planner node serves as the system's strategic intelligence, transforming unstructured user queries into actionable research plans.
#### Functionality Overview
The planner employs advanced prompt engineering to:
- Analyze user query intent and scope
- Identify key research dimensions
- Generate structured, sequential research tasks
- Provide search hints for each task
#### Prompt Design Strategy
The planning prompt is structured to maximize LLM reasoning capabilities:
```markdown
You are **PlannerAgent**. Your job is to transform a user research query into an executable research plan.
=== OUTPUT FORMAT ===
Return a single JSON array with specific field requirements...
=== REQUIREMENTS ===
1. Deeply analyze the query; identify core objectives
2. If clarity is insufficient, write clarifying questions
3. Produce a multi-step plan with logical sequencing
```
#### Critical Implementation Details
The planner node includes several sophisticated features:
**Structured Output Validation**: Uses LangChain's `with_structured_output` to ensure consistent JSON formatting.
**Error Recovery**: Implements fallback logic when structured planning fails:
```python
except Exception as e:
return {
"plan": [{"id": "task-1", "description": f"Research: {user_query}"}],
"current_task_pointer": 0
}
```
**Query Analysis**: Prioritizes explicit user queries while maintaining fallback to message history.
#### Planning Quality Factors
The planner's effectiveness depends on:
1. **Scope Appropriate Decomposition**: Breaking complex topics into manageable chunks
2. **Logical Task Sequencing**: Ensuring earlier tasks inform later ones
3. **Search Optimization**: Providing effective search hints for each task
4. **Completeness**: Covering all aspects of the research topic
### 2. Query Generation Node
The query generation node transforms high-level research objectives into specific, targeted web search queries while ensuring proper state propagation for task tracking.
#### Strategic Query Crafting
The node employs several strategies to generate effective queries:
1. **Diversity Maximization**: Creates queries that explore different aspects of the topic
2. **Specificity Optimization**: Balances broad coverage with targeted precision
3. **Currency Awareness**: Incorporates current date information for time-sensitive topics
4. **Source Diversification**: Generates queries likely to return results from different types of sources
5. **Task Context Awareness**: Generates queries specifically aligned with current research task
#### Enhanced State Management
The node includes critical fixes for state propagation:
```python
def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
# Task-aware query generation
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"] # Use current task description
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# Generate queries for the specific task
result = structured_llm.invoke(formatted_prompt)
# FIXED: Ensure state propagation
return {
"query_list": result.query,
"plan": state.get("plan", []), # Propagate plan
"current_task_pointer": state.get("current_task_pointer", 0) # Propagate pointer
}
```
#### Prompt Engineering for Query Generation
The query generation prompt includes:
```markdown
You are a **QueryGenerator** responsible for creating sophisticated web search queries.
=== REQUIREMENTS ===
1. Each query should focus on ONE specific aspect
2. Queries should be diverse and complementary
3. Maximum {number_queries} queries allowed
4. Ensure queries target current information
5. Avoid redundant or overly similar queries
```
#### Task-to-Query Transition
A critical enhancement is the `continue_to_web_research` function that properly associates queries with tasks:
```python
def continue_to_web_research(state: QueryGenerationState):
# Get current task info from propagated state
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task_id = "unknown"
if plan and current_pointer < len(plan):
current_task_id = plan[current_pointer]["id"] # Extract actual task ID
return [
Send("web_research", {
"search_query": search_query,
"id": int(idx),
"current_task_id": current_task_id # FIXED: Proper task association
})
for idx, search_query in enumerate(state["query_list"])
]
```
#### Query Quality Assessment
Generated queries are evaluated based on:
- **Relevance**: Direct connection to the research objective
- **Specificity**: Appropriate level of detail for effective search
- **Diversity**: Coverage of different aspects or perspectives
- **Searchability**: Likelihood of returning high-quality results
- **Task Alignment**: Alignment with the specific research task
### 3. Web Research Node
The web research node represents the system's interface with external knowledge sources, utilizing Google's search API to gather comprehensive information.
#### Multi-Modal Research Execution
The research process incorporates:
1. **Native Google Search Integration**: Uses Google's GenAI client with search tools
2. **Grounding Metadata Processing**: Extracts and processes source attribution
3. **URL Resolution**: Converts search results into manageable citation formats
4. **Content Synthesis**: Combines search results into coherent findings
#### Citation and Source Management
The system implements sophisticated source tracking with real URL preservation:
```python
resolved_urls = resolve_urls(
response.candidates[0].grounding_metadata.grounding_chunks,
state["id"]
)
citations = get_citations(response, resolved_urls)
modified_text = insert_citation_markers(response.text, citations)
```
This ensures:
- **Attribution Accuracy**: Every claim is linked to its source
- **Real URL Preservation**: Original URLs are maintained for user accessibility (FIXED)
- **Citation Integration**: Sources are seamlessly embedded in the research text with verifiable links
#### Error Handling and Resilience
The web research node includes comprehensive error handling that preserves task associations even during failures:
```python
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
try:
# Main research execution logic
response = genai_client.models.generate_content(...)
# Process successful response
current_task_id = state.get("current_task_id", "unknown")
detailed_finding = {
"task_id": current_task_id, # Preserve task association
"query_id": state["id"],
"content": modified_text,
"source": sources_gathered[0] if sources_gathered else None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # Preserve task association
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": sources_gathered,
"executed_search_queries": [state["search_query"]],
"web_research_result": [modified_text],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result] # Enhanced structure
}
except Exception as e:
# ENHANCED: Error handling preserving task context
current_task_id = state.get("current_task_id", "unknown")
error_message = f"Error during web research: {str(e)}"
detailed_finding = {
"task_id": current_task_id, # Preserve task association even in errors
"query_id": state["id"],
"content": error_message,
"source": None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # Preserve task association even in errors
"content": error_message,
"sources": [],
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": [],
"executed_search_queries": [state["search_query"]],
"web_research_result": [error_message],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
```
This enhanced error handling ensures:
- **Task Association Preservation**: Task IDs are maintained even during API failures
- **Complete State Updates**: All required state fields are populated in error scenarios
- **Graceful Degradation**: System continues operation with error information properly tracked
- **Debugging Support**: Error messages include full context for troubleshooting
#### Task-Specific Result Organization
A critical enhancement to the system is the organization of research results by task:
```python
task_specific_result = {
"task_id": current_task_id,
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
```
This structure enables:
- **Task Association**: Results are clearly linked to their originating research task
- **Temporal Tracking**: Timestamps enable chronological organization of findings
- **Source Preservation**: Citation information is maintained for each result
### 4. Reflection Node
The reflection node implements a critical quality control mechanism, evaluating research completeness and identifying knowledge gaps while maintaining proper state continuity.
#### Enhanced State Management
The reflection node includes fixes for proper state propagation:
```python
def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
# Get current task context for focused reflection
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"] # Use current task description
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# Perform reflection analysis
result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
# FIXED: Ensure state propagation
return {
"is_sufficient": result.is_sufficient,
"knowledge_gap": result.knowledge_gap,
"follow_up_queries": result.follow_up_queries,
"research_loop_count": state["research_loop_count"],
"number_of_ran_queries": len(state["executed_search_queries"]),
"plan": state.get("plan", []), # Propagate plan
"current_task_pointer": state.get("current_task_pointer", 0) # Propagate pointer
}
```
#### Reflection Strategy
The reflection process involves:
1. **Completeness Assessment**: Evaluating whether current findings sufficiently address the research objective
2. **Gap Identification**: Systematically identifying areas requiring additional investigation
3. **Follow-up Generation**: Creating targeted queries to address identified gaps
4. **Research Loop Management**: Determining whether to continue or conclude research
5. **Task Context Preservation**: Maintaining awareness of the current research task
#### Structured Reflection Output
The reflection node uses structured output to ensure consistent evaluation:
```json
{
"is_sufficient": boolean,
"knowledge_gap": "Specific description of missing information",
"follow_up_queries": ["targeted query 1", "targeted query 2"]
}
```
#### Enhanced Follow-up Query Generation
The `evaluate_research` function ensures proper task association for follow-up queries:
```python
def evaluate_research(state: ReflectionState, config: RunnableConfig) -> OverallState:
if state["is_sufficient"] or state["research_loop_count"] >= max_research_loops:
return "record_task_completion"
else:
# FIXED: Get current task info from propagated state
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task_id = "unknown"
if plan and current_pointer < len(plan):
current_task_id = plan[current_pointer]["id"]
return [
Send("web_research", {
"search_query": follow_up_query,
"id": state["number_of_ran_queries"] + int(idx),
"current_task_id": current_task_id # Proper task association
})
for idx, follow_up_query in enumerate(state["follow_up_queries"])
]
```
#### Quality Control Mechanisms
The reflection system implements several quality controls:
1. **Loop Limiting**: Maximum research iterations prevent infinite loops
2. **Query Diversification**: Follow-up queries explore new information dimensions
3. **Context Preservation**: Reflection considers all previous research findings
4. **Objective Alignment**: Ensures new queries remain aligned with original research goals
5. **Task Continuity**: Maintains task association throughout the reflection cycle
### 5. Content Enhancement Node
The content enhancement node represents a critical innovation in the research pipeline, implementing intelligent decision-making for targeted deep content scraping using Firecrawl integration.
#### Smart Enhancement Decision Making
The content enhancement process employs sophisticated analysis to determine when additional depth is needed:
```python
def content_enhancement_analysis(state: OverallState, config: RunnableConfig) -> dict:
# Analyze current research context
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
# Determine research topic
if plan and current_pointer < len(plan):
research_topic = plan[current_pointer]["description"]
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# Get current findings and sources
current_findings = state.get("web_research_result", [])
grounding_sources = extract_grounding_sources(state)
# Use intelligent decision maker
decision = get_content_enhancement_decision_maker().analyze_enhancement_need(
research_topic=research_topic,
current_findings=current_findings,
grounding_sources=grounding_sources,
config=config
)
```
#### Enhancement Decision Criteria
The system evaluates several factors to determine enhancement needs:
1. **Content Depth Analysis**: Assessing whether current findings provide sufficient detail
2. **Source Quality Evaluation**: Determining if higher-quality sources are available
3. **Information Gap Detection**: Identifying specific areas requiring deeper investigation
4. **Resource Efficiency**: Balancing enhancement benefits against computational costs
#### Firecrawl Integration Strategy
When enhancement is deemed necessary, the system:
1. **Prioritizes URLs**: Selects the most promising sources for deep scraping
2. **Executes Targeted Scraping**: Uses Firecrawl for comprehensive content extraction
3. **Quality Assessment**: Evaluates the effectiveness of enhanced content
4. **Content Integration**: Merges enhanced findings with existing research
#### Enhancement Types
The system supports multiple enhancement categories:
- **Technical Deep Dives**: Detailed technical specifications and implementation details
- **Market Intelligence**: Current market data and competitive analysis
- **Case Studies**: Real-world implementation examples and outcomes
- **Regulatory Information**: Policies, standards, and compliance requirements
### 6. Enhanced Research Evaluation Node
The enhanced research evaluation node represents an evolution of the traditional reflection process, incorporating content enhancement results into research sufficiency assessment.
#### Intelligent Evaluation Integration
The evaluation process considers multiple factors beyond traditional content analysis:
```python
def evaluate_research_enhanced(state: OverallState, config: RunnableConfig) -> dict:
# Get reflection results
reflection_is_sufficient = state.get("reflection_is_sufficient", False)
# Check enhancement status and effectiveness
enhancement_status = state.get("enhancement_status")
enhanced_sources_count = state.get("enhanced_sources_count", 0)
# Intelligent decision combining reflection and enhancement results
is_sufficient = reflection_is_sufficient
# Enhancement boost evaluation
if not is_sufficient and enhancement_status == "completed" and enhanced_sources_count > 0:
enhancement_boost = min(enhanced_sources_count * 0.3, 0.8)
if enhancement_boost >= 0.6:
is_sufficient = True
```
#### Multi-Dimensional Assessment
The enhanced evaluation considers:
1. **Traditional Reflection Results**: LLM-based completeness assessment
2. **Enhancement Effectiveness**: Success and impact of content enhancement
3. **Source Diversity**: Breadth of information sources utilized
4. **Content Depth**: Comprehensive coverage of research objectives
5. **Quality Metrics**: Overall research quality indicators
#### Dynamic Decision Making
The evaluation node implements sophisticated routing logic:
- **Continue Research**: When significant gaps remain despite enhancement
- **Complete Task**: When research objectives are sufficiently addressed
- **Adaptive Thresholds**: Dynamic adjustment based on enhancement success
### 7. Task Completion Node
The task completion node manages the transition between individual research tasks and maintains a comprehensive ledger of findings with enhanced data association capabilities.
#### Enhanced Task State Management
The node includes significant improvements for proper finding association:
```python
def record_task_completion_node(state: OverallState, config: RunnableConfig) -> dict:
# Get current task info
plan = state.get("plan", [])
current_pointer = state.get("current_task_pointer", 0)
current_task = plan[current_pointer]
current_task_id = current_task.get("id")
# IMPROVED: Enhanced task-specific finding extraction
detailed_findings = state.get("current_task_detailed_findings", [])
task_specific_findings = [
finding["content"] for finding in detailed_findings
if finding.get("task_id") == current_task_id
]
# FALLBACK: If no task-specific findings found, use recent web results
if not task_specific_findings:
print(f"Warning: No task-specific findings found for task {current_task_id}, using recent web results as fallback")
web_results = state.get("web_research_result", [])
task_specific_findings = web_results[-3:] if len(web_results) > 3 else web_results
# Generate comprehensive task summary
task_summary = _summarize_task_findings(
current_task["description"],
task_specific_findings,
config
)
# ENHANCED: Create citations from detailed findings
citations_for_snippets = []
for finding in detailed_findings:
if finding.get("task_id") == current_task_id and finding.get("source"):
citations_for_snippets.append({
"snippet": finding["content"],
"source": str(finding["source"])
})
# Create comprehensive ledger entry
ledger_entry = {
"task_id": current_task_id,
"description": current_task["description"],
"findings_summary": task_summary,
"detailed_snippets": task_specific_findings, # Now properly populated
"citations_for_snippets": citations_for_snippets
}
# Update plan status and prepare for next task
plan[current_pointer]["status"] = "completed"
return {
"ledger": [ledger_entry],
"global_summary_memory": [task_summary],
"plan": plan,
"current_task_pointer": current_pointer + 1,
"current_task_detailed_findings": [], # Clear for next task
"next_node_decision": "continue" if current_pointer + 1 < len(plan) else "end"
}
```
#### Task State Management
This enhanced node handles:
1. **Finding Summarization**: Condensing detailed research into key insights
2. **Ledger Entry Creation**: Structured storage of task-specific findings with proper association
3. **Progress Tracking**: Updating task completion status
4. **Context Preparation**: Preparing state for subsequent tasks
5. **Fallback Mechanisms**: Handling cases where task-specific data is missing
#### Improved Summarization Strategy
The task completion process employs intelligent summarization with fallback handling:
```python
def _summarize_task_findings(task_description: str, web_results: List[str], config: RunnableConfig) -> str:
if not web_results:
return f"No specific findings available for task: {task_description}"
# Use recent results to avoid overwhelming context
recent_results = web_results[-3:] if len(web_results) > 3 else web_results
context_to_summarize = "\n---\n".join(recent_results)
prompt = f"""Given the research task: "{task_description}"
And the following research findings:
{context_to_summarize}
Please provide a concise summary (1-2 sentences) of the key findings that directly address this specific task."""
try:
response = llm.invoke(prompt)
return response.content if hasattr(response, 'content') else str(response)
except Exception as e:
print(f"Task summarization failed: {e}")
return f"Completed research for: {task_description}"
```
This ensures:
- **Task Alignment**: Summaries focus on the specific research objective
- **Conciseness**: Key findings are distilled into manageable insights
- **Context Preservation**: Important details are retained for final report generation
- **Error Recovery**: Graceful handling of summarization failures
#### Enhanced Ledger Structure
The ledger now maintains comprehensive records with proper task association:
```json
{
"task_id": "unique_identifier",
"description": "research_objective",
"findings_summary": "key_insights",
"detailed_snippets": ["detailed_finding_1", "detailed_finding_2"], // Now properly populated
"citations_for_snippets": [{"snippet": "content", "source": "url"}] // Enhanced with proper associations
}
```
#### Data Quality Improvements
Recent enhancements ensure:
- **Complete Finding Association**: Task-specific results are properly linked to ledger entries
- **Fallback Data Recovery**: Missing task-specific data is recovered from general web results
- **Citation Preservation**: Source information is maintained through task completion
- **State Continuity**: Task context is properly managed across task transitions
### 8. Report-Level Enhancer
The report-level enhancer performs targeted deep enhancement for identified information gaps during final report synthesis.
#### Enhanced State Management
The enhancer includes fixes for proper state propagation:
```python
def enhance_report(state: OverallState, config: RunnableConfig) -> OverallState:
# Get current task context for focused enhancement
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"] # Use current task description
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# Perform report enhancement
result = llm.with_structured_output(ReportEnhancer).invoke(formatted_prompt)
# FIXED: Ensure state propagation
return {
"plan": state.get("plan", []), # Propagate plan
"current_task_pointer": state.get("current_task_pointer", 0), # Propagate pointer
"final_report_markdown": result.enhanced_report
}
```
#### Enhanced Report Generation
The enhancer generates a comprehensive, well-structured report:
```python
def generate_report(state: OverallState, config: RunnableConfig) -> str:
# Get current task context for focused report generation
plan = state.get("plan")
pointer = state.get("current_task_pointer")
if plan and pointer is not None and pointer < len(plan):
research_topic = plan[pointer]["description"] # Use current task description
else:
research_topic = state.get("user_query") or get_research_topic(state["messages"])
# Generate report content
report_content = f"""
{research_topic}
{_summarize_task_findings(research_topic, state["web_research_result"], config)}
"""
# Append task-specific findings
for task in plan:
if task["status"] == "completed":
report_content += f"""
{task["description"]}
{_summarize_task_findings(task["description"], state["task_specific_results"], config)}
"""
return report_content
```
This ensures:
- **Comprehensive Coverage**: All research findings are utilized in the final report
- **Logical Structure**: Information flows logically from general to specific
- **Context Preservation**: Task-specific findings are integrated seamlessly
### 9. Document Synthesizer
The document synthesis node represents the culmination of the research process, transforming accumulated findings into comprehensive, well-structured reports.
#### Architectural Design for Comprehensive Output
The synthesis process addresses a fundamental challenge in AI-generated reports: ensuring that extensive research findings are fully utilized rather than simply summarized. The system implements several strategies:
1. **Batch Processing**: Large volumes of research content are processed in manageable chunks
2. **Context Maximization**: Each batch utilizes the full available context window (up to 100,000 tokens)
3. **Continuity Preservation**: Batch transitions maintain narrative flow and coherence
4. **Source Integration**: Citations and references are preserved throughout the synthesis
#### Multi-Stage Report Generation
The report generation follows a structured approach:
##### Stage 1: Introduction Generation
```python
intro_prompt = f"""Based on the research plan and findings, write a comprehensive introduction.
Research Topic: {user_query}
Research Plan: {[task['description'] for task in plan]}
The introduction should:
1. Clearly state the research topic and its importance
2. Outline the main areas of investigation
3. Preview the key findings
4. Be written in a formal, academic style
"""
```
##### Stage 2: Section-by-Section Processing
For each research task, the system:
1. Retrieves task-specific results
2. Implements fallback mechanisms for missing data
3. Processes content in batches if volume exceeds token limits
4. Maintains continuity across batch boundaries
##### Stage 3: Conclusion Synthesis
The conclusion integrates findings across all research tasks:
```python
conclusion_prompt = f"""Based on the research findings, write a comprehensive conclusion.
Key Findings by Section: {findings_summary}
The conclusion should:
1. Summarize the main findings
2. Discuss implications and significance
3. Identify areas for future research
4. Be written in a formal, academic style
"""
```
## Data Flow and Transformations
### Information Architecture
The system's information architecture is designed to support progressive refinement and synthesis of research findings.
#### Raw Data Collection Phase
Initial data collection involves:
1. **Query Execution**: Web searches return unstructured text results
2. **Source Attribution**: Each result is tagged with source information
3. **Content Formatting**: Results are processed for downstream consumption
#### Intermediate Processing Phase
Research findings undergo several transformations:
1. **Task Association**: Results are linked to specific research objectives
2. **Quality Filtering**: Low-quality or irrelevant content is identified
3. **Citation Processing**: Source references are standardized and embedded
#### Synthesis Preparation Phase
Before final report generation:
1. **Content Aggregation**: Related findings are grouped by research task
2. **Narrative Planning**: The overall report structure is determined
3. **Context Optimization**: Content is organized to maximize LLM processing efficiency
#### Final Output Generation Phase
The culminating phase produces:
1. **Structured Reports**: Well-organized, comprehensive documents
2. **Integrated Citations**: Proper source attribution throughout
3. **Coherent Narrative**: Logical flow from introduction through conclusion
### Token Management and Context Optimization
#### Batch Processing Strategy
The system implements sophisticated token management:
```python
def split_by_tokens(texts, max_tokens=100000, encoding_name="cl100k_base"):
enc = tiktoken.get_encoding(encoding_name)
batches = []
current_batch = []
current_tokens = 0
for text in texts:
tokens = len(enc.encode(text))
if current_tokens + tokens > max_tokens and current_batch:
batches.append(current_batch)
current_batch = [text]
current_tokens = tokens
else:
current_batch.append(text)
current_tokens += tokens
if current_batch:
batches.append(current_batch)
return batches
```
This approach:
- **Maximizes Context Utilization**: Each batch approaches the LLM's context limit
- **Preserves Content Integrity**: Text units are not arbitrarily truncated
- **Enables Progressive Generation**: Each batch builds on previous outputs
#### Context Continuity Mechanisms
To maintain coherence across batch boundaries:
1. **Previous Content Integration**: Each batch includes summary of prior sections
2. **Transition Management**: Explicit instructions for maintaining narrative flow
3. **Redundancy Prevention**: Mechanisms to avoid content duplication
4. **Conclusion Coordination**: Final batches include section summarization
## Prompt Engineering and LLM Integration
### Prompt Design Philosophy
The system's prompt engineering follows several key principles:
#### Role-Based Agent Design
Each node operates with a clearly defined role:
- **PlannerAgent**: Strategic research planning
- **QueryGenerator**: Search query optimization
- **WebResearcher**: Information gathering and synthesis
- **ResearchAnalyst**: Quality evaluation and gap identification
- **ResearchReportWriter**: Document synthesis and presentation
#### Structured Output Requirements
All prompts specify exact output formats:
```markdown
=== OUTPUT FORMAT ===
Return a JSON object with these exact keys:
{
"field_name": "description",
"array_field": ["item1", "item2"]
}
```
This ensures:
- **Consistent Data Exchange**: Reliable interfaces between nodes
- **Error Reduction**: Minimizes parsing failures and malformed outputs
- **Automated Processing**: Enables seamless pipeline execution
#### Context-Aware Instruction Design
Prompts adapt based on execution context:
- **Task-Specific Instructions**: Different guidance for different research phases
- **Dynamic Examples**: Relevant examples based on current research domain
- **Conditional Logic**: Instructions that vary based on state conditions
### LLM Model Selection and Configuration
#### Model Specialization
Different models are used for different tasks:
- **Planning**: Models optimized for reasoning and decomposition
- **Research**: Models with strong information synthesis capabilities
- **Reflection**: Models configured for analytical evaluation
- **Writing**: Models tuned for coherent document generation
#### Temperature and Creativity Management
Temperature settings are carefully calibrated:
```python
# Planning requires creative problem decomposition
llm = ChatGoogleGenerativeAI(model=model, temperature=0.7)
# Research synthesis benefits from focused, factual output
llm = ChatGoogleGenerativeAI(model=model, temperature=0.3)
# Document writing balances creativity with accuracy
llm = ChatGoogleGenerativeAI(model=model, temperature=0.7)
```
#### Retry and Error Handling
All LLM interactions include robust error handling:
```python
llm = ChatGoogleGenerativeAI(
model=model,
temperature=temperature,
max_retries=2,
api_key=os.getenv("GEMINI_API_KEY"),
)
```
## Error Handling and Resilience
### Multi-Layer Error Management
The system implements error handling at multiple levels:
#### API-Level Error Handling
```python
try:
response = genai_client.models.generate_content(...)
except Exception as e:
return {
"web_research_result": [f"Error during research: {str(e)}"],
"sources_gathered": []
}
```
#### Node-Level Error Recovery
Each node includes fallback mechanisms:
- **Default Outputs**: Reasonable defaults when processing fails
- **State Preservation**: Critical state information is maintained despite errors
- **Graceful Degradation**: System continues operation with reduced functionality
#### System-Level Resilience
The overall system design includes:
- **Isolation**: Failures in one node don't cascade to others
- **Recovery**: Ability to resume processing after transient failures
- **Monitoring**: Comprehensive logging for debugging and optimization
### Data Validation and Integrity
#### Input Validation
Each node validates its inputs:
```python
if not plan or not ledger:
return {
"messages": [AIMessage(content="Error: No research plan available")],
"final_report_markdown": "No research findings available."
}
```
#### Output Validation
Generated content undergoes quality checks:
- **Format Validation**: Ensuring outputs match expected schemas
- **Content Validation**: Basic sanity checks on generated content
- **Completeness Validation**: Verifying all required fields are present
## Batch Generation Mechanism
### Technical Implementation
The batch generation system represents a sophisticated solution to the challenge of processing large volumes of research content within LLM context limits.
#### Token-Based Partitioning
The system uses tiktoken for accurate token counting:
```python
enc = tiktoken.get_encoding("cl100k_base")
tokens = len(enc.encode(text))
```
This ensures:
- **Accurate Measurement**: Precise token counting matches LLM tokenization
- **Optimal Utilization**: Batches approach but don't exceed context limits
- **Content Preservation**: Natural text boundaries are respected
#### Batch Continuity Strategy
Each batch after the first includes context from previous batches:
```python
if previous_content:
section_prompt += f"""
Previously generated content for this section:
{previous_content}
Continue from the above, ensuring logical flow and no repetition.
"""
```
This approach:
- **Maintains Coherence**: Each batch builds naturally on previous content
- **Prevents Redundancy**: Explicit instructions prevent content duplication
- **Ensures Completeness**: All source material is addressed across batches
#### Final Integration Process
The last batch in each section includes special instructions:
```python
if is_last:
section_prompt += "At the end of this batch, write a summary paragraph for the section."
else:
section_prompt += "Do not write a conclusion; just continue the section."
```
### Content Quality Optimization
#### Research Content Utilization
The batch system ensures comprehensive use of research findings:
1. **Complete Coverage**: Every research finding is processed and incorporated
2. **Detailed Expansion**: Each finding receives detailed analysis and explanation
3. **Source Attribution**: Citations are preserved and properly formatted
4. **Contextual Integration**: Findings are woven into coherent narrative sections
#### Narrative Coherence
Despite batch processing, the system maintains narrative quality through:
1. **Consistent Voice**: All batches use the same writing style and tone
2. **Logical Flow**: Information is presented in logical sequence
3. **Transition Management**: Smooth transitions between batch-generated content
4. **Section Unity**: Individual sections read as coherent wholes despite batch origins
## Content Quality Assurance
### Multi-Dimensional Quality Control
The system implements quality assurance across several dimensions:
#### Factual Accuracy
- **Source Verification**: All claims are traceable to specific sources
- **Citation Requirements**: Mandatory attribution for all factual statements
- **Cross-Reference Validation**: Consistency checking across different sources
#### Structural Quality
- **Logical Organization**: Information flows logically from general to specific
- **Section Balance**: Appropriate content distribution across report sections
- **Hierarchical Clarity**: Clear headings and subheadings organize content
#### Linguistic Quality
- **Formal Academic Style**: Consistent professional writing throughout
- **Technical Precision**: Accurate use of domain-specific terminology
- **Readability Optimization**: Clear, accessible presentation of complex information
### Quality Metrics and Evaluation
#### Quantitative Measures
- **Content Volume**: Ensuring sufficient detail in generated reports
- **Source Diversity**: Measuring breadth of information sources
- **Citation Density**: Appropriate level of source attribution
#### Qualitative Assessment
- **Coherence**: Logical flow and narrative consistency
- **Completeness**: Comprehensive coverage of research objectives
- **Relevance**: Direct connection between findings and research questions
## Performance Optimization
### Computational Efficiency
#### Parallel Processing
The system leverages LangGraph's parallel execution capabilities:
```python
return [
Send("web_research", {"search_query": query, "id": idx})
for idx, query in enumerate(state["query_list"])
]
```
This enables:
- **Concurrent Research**: Multiple search queries execute simultaneously
- **Reduced Latency**: Overall processing time is minimized
- **Resource Optimization**: Maximum utilization of available computing resources
#### Caching and State Management
- **State Persistence**: Intermediate results are preserved across node transitions
- **Incremental Processing**: Each node builds on previous work without redundant computation
- **Memory Optimization**: Efficient state structure minimizes memory usage
### API Efficiency
#### Request Optimization
- **Batch API Calls**: Multiple operations combined when possible
- **Retry Logic**: Intelligent retry mechanisms for transient failures
- **Rate Limiting**: Respectful API usage within provider limits
#### Context Window Utilization
- **Maximum Context Usage**: Each LLM call uses available context efficiently
- **Batch Size Optimization**: Batches sized to maximize context utilization
- **Content Prioritization**: Most important content processed first
## System Fixes and Improvements
### Critical Issues Addressed
This section documents significant improvements made to the system to address fundamental data flow and state management issues that were impacting report quality and system reliability.
#### Problem 1: Task ID Propagation Failure
**Issue Description**: Analysis of production logs revealed that most research results were being tagged with `"task_id": "unknown"`, preventing proper association between research findings and their corresponding tasks.
**Root Cause**: State definitions for intermediate nodes (`QueryGenerationState`, `ReflectionState`) were missing critical fields (`plan`, `current_task_pointer`) needed for task context propagation.
**Solution Implemented**:
```python
# Before (missing fields)
class QueryGenerationState(TypedDict):
query_list: list[Query]
# After (complete state propagation)
class QueryGenerationState(TypedDict):
query_list: list[Query]
plan: list
current_task_pointer: int
```
**Impact**: Ensures that every research query is properly associated with its originating task, enabling accurate content organization in final reports.
#### Problem 2: Incomplete Ledger Entries
**Issue Description**: Generated reports lacked detail because ledger entries had empty `detailed_snippets` arrays, despite successful research execution.
**Root Cause**: The `record_task_completion_node` function was unable to retrieve task-specific findings due to improper task ID filtering.
**Solution Implemented**:
```python
def record_task_completion_node(state: OverallState, config: RunnableConfig) -> dict:
# Improved task-specific finding extraction
current_task_id = current_task.get("id")
detailed_findings = state.get("current_task_detailed_findings", [])
task_specific_findings = [
finding["content"] for finding in detailed_findings
if finding.get("task_id") == current_task_id
]
# Fallback mechanism for missing data
if not task_specific_findings:
print(f"Warning: No task-specific findings found for task {current_task_id}, using recent web results as fallback")
web_results = state.get("web_research_result", [])
task_specific_findings = web_results[-3:] if len(web_results) > 3 else web_results
```
**Impact**: Ledger entries now contain comprehensive detailed snippets, enabling rich content synthesis in final reports.
#### Problem 3: Missing Task-Specific Results Structure
**Issue Description**: The system lacked a proper data structure for organizing research results by task, leading to content mixing and poor organization.
**Solution Implemented**:
```python
# Added to OverallState
task_specific_results: Annotated[List[Dict[str, Any]], operator.add]
# Enhanced web_research node to populate this structure
task_specific_result = {
"task_id": current_task_id,
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
```
**Impact**: Enables precise content organization and batch processing for comprehensive report generation.
#### Problem 4: Inadequate Error Handling
**Issue Description**: API failures (quota exhaustion, network errors) resulted in loss of task context and incomplete state updates.
**Solution Implemented**:
```python
def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
try:
# Main research execution logic
response = genai_client.models.generate_content(...)
# Process successful response
current_task_id = state.get("current_task_id", "unknown")
detailed_finding = {
"task_id": current_task_id, # Preserve task association
"query_id": state["id"],
"content": modified_text,
"source": sources_gathered[0] if sources_gathered else None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # Preserve task association
"content": modified_text,
"sources": sources_gathered,
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": sources_gathered,
"executed_search_queries": [state["search_query"]],
"web_research_result": [modified_text],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result] # Enhanced structure
}
except Exception as e:
# ENHANCED: Error handling preserving task context
current_task_id = state.get("current_task_id", "unknown")
error_message = f"Error during web research: {str(e)}"
detailed_finding = {
"task_id": current_task_id, # Preserve task association even in errors
"query_id": state["id"],
"content": error_message,
"source": None,
"timestamp": datetime.now().isoformat()
}
task_specific_result = {
"task_id": current_task_id, # Preserve task association even in errors
"content": error_message,
"sources": [],
"timestamp": datetime.now().isoformat()
}
return {
"sources_gathered": [],
"executed_search_queries": [state["search_query"]],
"web_research_result": [error_message],
"current_task_detailed_findings": [detailed_finding],
"task_specific_results": [task_specific_result]
}
```
**Impact**: System maintains data integrity and task associations even during API failures, ensuring robust operation.
### Validation and Testing
To ensure the effectiveness of these fixes, a comprehensive testing framework was implemented:
#### Test Suite Components
1. **State Definition Validation**: Verifies all required fields are present in state type definitions
2. **Task ID Propagation Testing**: Confirms proper task association through the entire pipeline
3. **Error Handling Verification**: Validates system behavior under various failure conditions
#### Test Results
All tests pass successfully, confirming:
- ✅ State definitions include all necessary fields
- ✅ Task IDs propagate correctly through the pipeline
- ✅ Error conditions preserve task associations
- ✅ Fallback mechanisms function as intended
### Performance Impact
The fixes provide significant improvements in system reliability and output quality:
1. **Data Integrity**: 100% reduction in "unknown" task IDs under normal operation
2. **Content Richness**: Ledger entries now contain comprehensive detailed findings
3. **Report Quality**: Final reports utilize full research context through proper task organization
4. **System Resilience**: Graceful degradation during API failures with preserved task context
### Recent Critical Fixes (Latest Updates)
#### Citation URL Management Fix (December 2024)
**Problem**: All citations in generated reports pointed to inaccessible `vertexaisearch.cloud.google.com` internal URLs instead of real source URLs.
**Solution**: Modified `resolve_urls` function in `utils.py` to preserve original URLs:
```python
# Fixed to preserve real URLs instead of creating fake ones
resolved_map[url] = url # Keep original URL
```
**Impact**: Citations now provide real, accessible URLs for source verification.
#### Report-Level Enhancement System (December 2024)
**Addition**: Implemented dual-layer content enhancement system with:
- Pre-analysis for information gap identification
- Targeted Firecrawl integration for specific missing data
- Quality assessment and smart enhancement decisions
**Impact**: Comprehensive reports with targeted deep enhancement for identified gaps.
### Monitoring and Maintenance
Ongoing monitoring includes:
- Task ID propagation verification in production logs
- Ledger entry completeness tracking
- Error rate monitoring with task context preservation
- Report quality assessment through content metrics
- Citation URL accessibility validation (NEW)
- Report-level enhancement effectiveness tracking (NEW)
## Report-Level Content Enhancement
The report-level content enhancement system represents a significant innovation that addresses information gaps identified during the final report synthesis phase. This dual-layer enhancement approach ensures comprehensive coverage of research topics.
### Architectural Overview
The report-level enhancement system operates as a pre-analysis step in the `finalize_answer` node, where the LLM can identify specific information gaps before generating the final report. This targeted approach differs from task-level enhancement by focusing on cross-task information needs and synthesis requirements.
#### Key Components
1. **ReportLevelEnhancer**: Main enhancement coordination class
2. **Enhancement Request Analysis**: LLM-powered gap identification
3. **Targeted Firecrawl Integration**: Selective deep web scraping
4. **Quality Assessment**: Enhancement effectiveness evaluation
### Implementation Strategy
The enhancement process follows a structured approach:
```python
def integrate_report_enhancement_into_finalize(
user_query: str,
research_plan: List[Dict],
aggregated_research_data: str,
available_sources: List[Dict[str, Any]],
config: RunnableConfig
) -> Tuple[str, List[ReportEnhancementResult]]:
enhancer = ReportLevelEnhancer()
# 1. Analyze enhancement needs
enhancement_requests = enhancer.analyze_report_enhancement_needs(
user_query, research_plan, aggregated_research_data, config
)
if not enhancement_requests:
print("✅ Report-level analysis: Current information is sufficient")
return aggregated_research_data, []
# 2. Execute targeted enhancement
enhancement_results = enhancer.execute_targeted_enhancement(
enhancement_requests, available_sources
)
# 3. Merge enhanced content
enhanced_data = aggregated_research_data
successful_enhancements = [r for r in enhancement_results if r.success]
if successful_enhancements:
for result in successful_enhancements:
enhanced_data += f"\n\n## Report-Level Deep Enhancement\n{result.enhanced_content}"
return enhanced_data, enhancement_results
```
### Enhancement Types and Targeting
The system identifies several categories of enhancement needs:
1. **Specific Data & Statistics**: Quantitative data gaps
2. **Implementation Cases & Technical Details**: Concrete examples and technical specifications
3. **Market Data & Competitive Analysis**: Current market information
4. **Policies, Regulations & Standards**: Regulatory framework coverage
### Quality Assurance
Enhancement results undergo quality assessment:
```python
def _assess_enhancement_quality(content: str, request: ReportEnhancementRequest) -> str:
length = len(content)
target_keywords = request.target_information.lower().split()
keyword_matches = sum(1 for keyword in target_keywords if keyword in content.lower())
keyword_ratio = keyword_matches / len(target_keywords) if target_keywords else 0
if length > 2000 and keyword_ratio > 0.6:
return "excellent"
elif length > 1000 and keyword_ratio > 0.4:
return "good"
elif length > 500 and keyword_ratio > 0.2:
return "fair"
else:
return "poor"
```
## Citation System and URL Management
### Critical URL Management Fix
A major system improvement addressed a fundamental flaw in the citation URL system that was generating inaccessible references for users.
#### Problem Identification
Analysis of production results revealed that citations contained URLs in the format:
```
[source](https://vertexaisearch.cloud.google.com/id/x-x)
```
These URLs were Google Vertex AI Search internal references, not accessible to end users, rendering the entire citation system ineffective for source verification.
#### Root Cause Analysis
The issue originated in the `resolve_urls` function in `utils.py`:
```python
# PROBLEMATIC ORIGINAL IMPLEMENTATION
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
prefix = f"https://vertexaisearch.cloud.google.com/id/"
urls = [site.web.uri for site in urls_to_resolve]
for idx, url in enumerate(urls):
if url not in resolved_map:
resolved_map[url] = f"{prefix}{id}-{idx}" # Creates fake URLs!
```
This function incorrectly transformed real, accessible URLs into fake internal references.
#### Solution Implementation
The fix preserves original URLs while maintaining the deduplication functionality:
```python
# FIXED IMPLEMENTATION
def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]:
"""
Create a map that preserves the original URLs instead of replacing them with fake internal IDs.
This ensures citations point to real, accessible web sources.
"""
urls = [site.web.uri for site in urls_to_resolve]
resolved_map = {}
for idx, url in enumerate(urls):
if url not in resolved_map:
resolved_map[url] = url # Keep the original URL!
return resolved_map
```
#### Impact Assessment
The fix provides several critical improvements:
1. **User Accessibility**: Citations now point to real, clickable URLs
2. **Source Verification**: Users can verify information by accessing original sources
3. **Professional Standards**: Reports meet academic and professional citation requirements
4. **System Integrity**: No loss of source tracking functionality
#### Enhanced Citation Processing
The citation system now properly handles URL preservation through the entire pipeline:
```python
def convert_citations_to_readable(content, source_mapping):
def replace_citation(match):
citation_id = match.group(1)
if citation_id in source_mapping:
source_info = source_mapping[citation_id]
domain = source_info.get('domain', 'Unknown Source')
url = source_info.get('value', '')
label = source_info.get('label', domain)
# Format with real, accessible URLs
if url and url.startswith('http') and 'vertexaisearch.cloud.google.com' not in url:
return f"[Source: {label} ({url})]"
else:
return f"[Source: {label}]"
return f"[Source: {citation_id}]"
# Convert citations while preserving real URLs
content = re.sub(r'\[vertexaisearch\.cloud\.google\.com/id/([^\]]+)\]',
replace_citation, content)
return content
```
### Citation Quality Validation
The system now includes validation to ensure citation quality:
1. **URL Accessibility**: Verification that URLs are not internal system references
2. **Source Attribution**: Proper linking of content to original sources
3. **Format Consistency**: Standardized citation format across reports
4. **Completeness**: All factual claims include appropriate source attribution
## Future Enhancements
### Planned Improvements
#### Advanced Citation Management
- **Academic Format Support**: APA, MLA, and other citation styles
- **Source Quality Assessment**: Automatic evaluation of source credibility
- **Reference Deduplication**: Intelligent handling of duplicate sources
#### Content Enhancement Features
- **Visual Content Integration**: Charts, graphs, and diagrams in reports
- **Multi-Media Support**: Integration of video and audio sources
- **Interactive Elements**: Expandable sections and dynamic content
#### Quality Assurance Enhancements
- **Automated Fact-Checking**: Cross-reference verification against reliable sources
- **Bias Detection**: Identification and mitigation of content bias
- **Completeness Scoring**: Quantitative assessment of research thoroughness
### Scalability Considerations
#### System Architecture
- **Microservice Decomposition**: Breaking system into independently scalable components
- **Database Integration**: Persistent storage for large-scale research projects
- **Load Balancing**: Distribution of processing across multiple instances
#### Performance Optimization
- **Caching Layers**: Multiple levels of caching for improved response times
- **Asynchronous Processing**: Non-blocking execution for better resource utilization
- **Stream Processing**: Real-time result streaming for large documents
### Integration Possibilities
#### External System Integration
- **Academic Databases**: Direct integration with scholarly research platforms
- **Enterprise Systems**: Connection to organizational knowledge bases
- **Collaborative Platforms**: Multi-user research and editing capabilities
#### API and Developer Experience
- **RESTful API**: Standardized interface for external integrations
- **SDK Development**: Language-specific libraries for easy integration
- **Webhook Support**: Event-driven integration with external systems
## Conclusion
The LangGraph-based research agent represents a significant advancement in automated research and document generation. By orchestrating multiple specialized AI agents, implementing sophisticated state management, and utilizing advanced prompt engineering techniques, the system transforms simple user queries into comprehensive, well-researched documents.
The key innovations include:
1. **Multi-Agent Orchestration**: Specialized agents handle different aspects of the research pipeline
2. **Iterative Research Process**: Reflection and refinement cycles ensure comprehensive coverage
3. **Batch Generation Mechanism**: Efficient utilization of large LLM context windows
4. **State Management**: Sophisticated tracking of research progress and findings with proper task association
5. **Quality Assurance**: Multiple layers of validation and error handling
6. **System Resilience**: Robust error handling that preserves data integrity under failure conditions
### Recent Improvements
The system has undergone significant improvements to address critical issues in task tracking and content organization:
- **Enhanced State Management**: Complete task context propagation through all pipeline stages
- **Improved Data Organization**: Task-specific result tracking for better content synthesis
- **Robust Error Handling**: Graceful degradation with preserved task associations
- **Comprehensive Testing**: Validation framework ensuring system reliability
The system's design prioritizes both quality and scalability, making it suitable for a wide range of research applications from academic work to business intelligence. The modular architecture enables continuous improvement and customization for specific use cases.
As AI capabilities continue to advance, systems like this will become increasingly important for augmenting human research capabilities and democratizing access to comprehensive, high-quality research outputs. The foundation established here, with its robust state management and task tracking capabilities, provides a solid platform for future enhancements and specialized applications.
The comprehensive documentation provided here serves as both a technical reference and a design guide for similar systems. By understanding the principles and implementation details outlined in this document, developers can build upon this foundation to create even more sophisticated research and document generation systems.
Through careful attention to prompt engineering, state management, error handling, and quality assurance, this system demonstrates how modern AI technologies can be orchestrated to produce outputs that rival human-generated research reports in comprehensiveness and quality. The future of automated research lies in systems that combine the reasoning capabilities of large language models with the systematic approach and quality controls demonstrated in this implementation.
================================================
FILE: frontend/.gitignore
================================================
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
dist
dist-ssr
*.local
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
================================================
FILE: frontend/components.json
================================================
{
"$schema": "https://ui.shadcn.com/schema.json",
"style": "new-york",
"rsc": false,
"tsx": true,
"tailwind": {
"config": "",
"css": "src/app.css",
"baseColor": "neutral",
"cssVariables": true,
"prefix": ""
},
"aliases": {
"components": "@/components",
"utils": "@/lib/utils",
"ui": "@/components/ui",
"lib": "@/lib",
"hooks": "@/hooks"
},
"iconLibrary": "lucide"
}
================================================
FILE: frontend/eslint.config.js
================================================
import js from '@eslint/js'
import globals from 'globals'
import reactHooks from 'eslint-plugin-react-hooks'
import reactRefresh from 'eslint-plugin-react-refresh'
import tseslint from 'typescript-eslint'
export default tseslint.config(
{ ignores: ['dist'] },
{
extends: [js.configs.recommended, ...tseslint.configs.recommended],
files: ['**/*.{ts,tsx}'],
languageOptions: {
ecmaVersion: 2020,
globals: globals.browser,
},
plugins: {
'react-hooks': reactHooks,
'react-refresh': reactRefresh,
},
rules: {
...reactHooks.configs.recommended.rules,
'react-refresh/only-export-components': [
'warn',
{ allowConstantExport: true },
],
},
},
)
================================================
FILE: frontend/index.html
================================================
Vite + React + TS
================================================
FILE: frontend/package.json
================================================
{
"name": "frontend",
"private": true,
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"build": "tsc -b && vite build",
"lint": "eslint .",
"preview": "vite preview"
},
"dependencies": {
"@langchain/core": "^0.3.55",
"@langchain/langgraph-sdk": "^0.0.74",
"@radix-ui/react-scroll-area": "^1.2.8",
"@radix-ui/react-select": "^2.2.4",
"@radix-ui/react-slot": "^1.2.2",
"@radix-ui/react-tabs": "^1.1.11",
"@radix-ui/react-tooltip": "^1.2.6",
"@tailwindcss/vite": "^4.1.5",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^0.508.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-markdown": "^9.0.3",
"react-router-dom": "^7.5.3",
"tailwind-merge": "^3.2.0",
"tailwindcss": "^4.1.5"
},
"devDependencies": {
"@eslint/js": "^9.22.0",
"@types/node": "^22.15.17",
"@types/react": "^19.1.2",
"@types/react-dom": "^19.1.3",
"@vitejs/plugin-react-swc": "^3.9.0",
"eslint": "^9.22.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.19",
"globals": "^16.0.0",
"tw-animate-css": "^1.2.9",
"typescript": "~5.7.2",
"typescript-eslint": "^8.26.1",
"vite": "^6.3.4"
}
}
================================================
FILE: frontend/src/App.tsx
================================================
import { useStream } from "@langchain/langgraph-sdk/react";
import type { Message } from "@langchain/langgraph-sdk";
import { useState, useEffect, useRef, useCallback } from "react";
import { ProcessedEvent } from "@/components/ActivityTimeline";
import { WelcomeScreen } from "@/components/WelcomeScreen";
import { ChatMessagesView } from "@/components/ChatMessagesView";
import { transformEventsToHierarchy, debugTransformResult, EventData } from "@/utils/dataTransformer";
// 添加类型定义
interface StreamEvent {
[key: string]: unknown;
}
interface SourceData {
title?: string;
url?: string;
label?: string;
snippet?: string;
}
export default function App() {
const [processedEventsTimeline, setProcessedEventsTimeline] = useState<
ProcessedEvent[]
>([]);
const [historicalActivities, setHistoricalActivities] = useState<
Record
>({});
const scrollAreaRef = useRef(null);
const hasFinalizeEventOccurredRef = useRef(false);
const thread = useStream<{
messages: Message[];
initial_search_query_count: number;
max_research_loops: number;
reasoning_model: string;
}>({
apiUrl: import.meta.env.DEV
? "http://localhost:2024"
: "http://localhost:8123",
assistantId: "agent",
messagesKey: "messages",
onFinish: (state) => {
console.log(state);
},
onUpdateEvent: (event: StreamEvent) => {
// 🐛 DEBUG: 完整事件日志
console.log("📨 收到事件:", event);
console.log("📊 事件结构分析:", {
eventKeys: Object.keys(event),
eventType: typeof event,
hasGenerateQuery: !!event.generate_query,
hasWebResearch: !!event.web_research,
hasReflection: !!event.reflection,
hasPlanner: !!(event.planner_node || event.planner),
hasContentEnhancement: !!event.content_enhancement_analysis,
hasEvaluateResearch: !!event.evaluate_research_enhanced,
hasFinalizeAnswer: !!event.finalize_answer,
hasRecordTaskCompletion: !!event.record_task_completion,
allEventKeys: Object.keys(event).join(", ")
});
// 🔧 NEW: 收集事件用于转换器测试 - 现在使用静态收集而不是状态
const allEvents = JSON.parse(sessionStorage.getItem('research_events') || '[]') as EventData[];
allEvents.push(event as EventData);
sessionStorage.setItem('research_events', JSON.stringify(allEvents));
// 每5个事件测试一次转换器(避免过于频繁)
if (allEvents.length % 5 === 0) {
try {
const transformedData = transformEventsToHierarchy(allEvents, thread.messages || []);
console.log("🔍 数据转换器测试结果:");
debugTransformResult(transformedData);
} catch (error) {
console.warn("⚠️ 数据转换器测试失败:", error);
}
}
let processedEvent: ProcessedEvent | null = null;
let eventProcessed = false;
if (event.generate_query) {
const queryData = event.generate_query as { query_list?: string[] };
processedEvent = {
title: "Generating Search Queries",
data: queryData.query_list?.join(", ") || "No queries",
};
eventProcessed = true;
} else if (event.web_research) {
// 🐛 DEBUG: 详细记录web_research事件结构
console.log("🔍 Web Research 事件详细信息:", event.web_research);
const researchData = event.web_research as { sources_gathered?: SourceData[] };
const sources = researchData.sources_gathered || [];
const numSources = sources.length;
// 🐛 DEBUG: 记录来源结构
if (sources.length > 0) {
console.log("📊 第一个来源的结构:", sources[0]);
console.log("📊 所有来源的keys:", sources.map(s => Object.keys(s)));
}
const uniqueLabels = [
...new Set(sources.map((s: SourceData) => s.label).filter(Boolean)),
];
const exampleLabels = uniqueLabels.slice(0, 3).join(", ");
processedEvent = {
title: "Web Research",
data: `Gathered ${numSources} sources. Related to: ${
exampleLabels || "N/A"
}.`,
};
eventProcessed = true;
} else if (event.reflection) {
// 🐛 DEBUG: 详细记录reflection事件结构
console.log("🤔 Reflection 事件详细信息:", event.reflection);
const reflectionData = event.reflection as {
reflection_is_sufficient?: boolean;
reflection_follow_up_queries?: string[];
};
processedEvent = {
title: "Reflection",
data: reflectionData.reflection_is_sufficient
? "Search successful, generating final answer."
: `Need more information, searching for ${(reflectionData.reflection_follow_up_queries || []).join(
", "
)}`,
};
eventProcessed = true;
} else if (event.planner_node || event.planner) {
const plannerData = (event.planner_node || event.planner) as { plan?: unknown[] };
processedEvent = {
title: "Planning Research Strategy",
data: plannerData.plan
? `Generated ${plannerData.plan.length} research tasks`
: "Analyzing research requirements...",
};
eventProcessed = true;
} else if (event.content_enhancement_analysis) {
const enhancementData = event.content_enhancement_analysis as {
needs_enhancement?: boolean;
reasoning?: string;
};
processedEvent = {
title: "Content Enhancement Analysis",
data: enhancementData.needs_enhancement
? `Enhancement needed: ${enhancementData.reasoning || 'Analyzing content quality'}`
: "Content quality sufficient, proceeding with report generation",
};
eventProcessed = true;
} else if (event.evaluate_research_enhanced) {
const evaluationData = event.evaluate_research_enhanced as {
evaluation_is_sufficient?: boolean;
};
processedEvent = {
title: "Research Quality Evaluation",
data: evaluationData.evaluation_is_sufficient
? "Research meets quality standards"
: "Additional research required",
};
eventProcessed = true;
} else if (event.content_enhancement) {
// 🐛 DEBUG: 详细记录content enhancement事件结构
console.log("🔧 Content Enhancement 事件详细信息:", event.content_enhancement);
const enhancementData = event.content_enhancement as {
enhancement_status?: string;
};
const enhancementStatus = enhancementData.enhancement_status || "unknown";
const statusMessages: Record = {
"skipped": "Content enhancement skipped - quality sufficient",
"completed": "Content enhancement completed successfully",
"failed": "Content enhancement failed",
"error": "Content enhancement encountered errors",
"analyzing": "Analyzing content enhancement needs",
"skipped_no_api": "Content enhancement skipped - no API key"
};
processedEvent = {
title: "Content Enhancement Analysis",
data: statusMessages[enhancementStatus] || `Status: ${enhancementStatus}`,
};
eventProcessed = true;
} else if (event.record_task_completion) {
const completionData = event.record_task_completion as {
next_node_decision?: string;
ledger?: Array<{ description?: string }>;
};
const nextDecision = completionData.next_node_decision || "continue";
const ledger = completionData.ledger || [];
const completedTask = ledger.length > 0 ? ledger[0].description : "Unknown task";
processedEvent = {
title: "Task Completion Recorded",
data: nextDecision === "end"
? `All tasks completed. Final task: ${completedTask}`
: `Task completed: ${completedTask}. Moving to next task.`,
};
eventProcessed = true;
} else if (event.finalize_answer) {
processedEvent = {
title: "Finalizing Answer",
data: "Composing and presenting the final answer.",
};
hasFinalizeEventOccurredRef.current = true;
eventProcessed = true;
}
// 🐛 DEBUG: 检查是否有未处理的事件
if (!eventProcessed) {
console.warn("⚠️ 未处理的事件类型:", {
eventKeys: Object.keys(event),
eventData: event,
possibleMissingHandlers: [
"record_task_completion",
"content_enhancement",
"should_enhance_content",
"decide_next_research_step",
"decide_next_step_in_plan"
]
});
} else {
console.log("✅ 事件已处理:", processedEvent?.title);
// 🔧 NEW: 在任何关键事件处理后都尝试保存快照
if (processedEvent?.title === "Reflection" ||
processedEvent?.title === "Content Enhancement Analysis" ||
processedEvent?.title === "Research Quality Evaluation") {
console.log(`🎯 检测到关键事件,准备保存快照: ${processedEvent.title}`);
saveCurrentStateSnapshot(processedEvent.title);
}
}
if (processedEvent) {
console.log(`➕ 添加新事件到时间线: ${processedEvent.title}`);
setProcessedEventsTimeline((prevEvents) => {
const newEvents = [...prevEvents, processedEvent!];
console.log(`📋 更新后的事件时间线 (${newEvents.length}):`, newEvents.map(e => e.title));
return newEvents;
});
}
},
});
useEffect(() => {
if (scrollAreaRef.current) {
const scrollViewport = scrollAreaRef.current.querySelector(
"[data-radix-scroll-area-viewport]"
);
if (scrollViewport) {
scrollViewport.scrollTop = scrollViewport.scrollHeight;
}
}
}, [thread.messages]);
useEffect(() => {
if (
hasFinalizeEventOccurredRef.current &&
!thread.isLoading &&
thread.messages.length > 0
) {
const lastMessage = thread.messages[thread.messages.length - 1];
if (lastMessage && lastMessage.type === "ai" && lastMessage.id) {
setHistoricalActivities((prev) => ({
...prev,
[lastMessage.id!]: [...processedEventsTimeline],
}));
}
hasFinalizeEventOccurredRef.current = false;
}
}, [thread.messages, thread.isLoading, processedEventsTimeline]);
const handleSubmit = useCallback(
(submittedInputValue: string, effort: string, model: string) => {
if (!submittedInputValue.trim()) return;
setProcessedEventsTimeline([]);
hasFinalizeEventOccurredRef.current = false;
// 清空事件存储
sessionStorage.removeItem('research_events');
// convert effort to, initial_search_query_count and max_research_loops
// low means max 1 loop and 1 query
// medium means max 3 loops and 3 queries
// high means max 10 loops and 5 queries
let initial_search_query_count = 0;
let max_research_loops = 0;
switch (effort) {
case "low":
initial_search_query_count = 1;
max_research_loops = 1;
break;
case "medium":
initial_search_query_count = 3;
max_research_loops = 3;
break;
case "high":
initial_search_query_count = 5;
max_research_loops = 10;
break;
}
const newMessages: Message[] = [
...(thread.messages || []),
{
type: "human",
content: submittedInputValue,
id: Date.now().toString(),
},
];
thread.submit({
messages: newMessages,
initial_search_query_count: initial_search_query_count,
max_research_loops: max_research_loops,
reasoning_model: model,
});
},
[thread]
);
const handleCancel = useCallback(() => {
thread.stop();
window.location.reload();
}, [thread]);
// 新增:保存中间状态快照的函数
const saveCurrentStateSnapshot = useCallback((stateName: string) => {
console.log(`📸 保存状态快照: ${stateName}`);
console.log(`📊 当前消息数量: ${thread.messages?.length || 0}`);
console.log(`📊 当前时间线事件数: ${processedEventsTimeline.length}`);
// 增加延迟时间,确保AI消息已创建
setTimeout(() => {
console.log(`⏰ 延迟后检查消息: ${thread.messages?.length || 0}`);
if (thread.messages && thread.messages.length > 0) {
const lastMessage = thread.messages[thread.messages.length - 1];
console.log(`📋 最后一条消息:`, {
id: lastMessage.id,
type: lastMessage.type,
contentLength: typeof lastMessage.content === 'string' ? lastMessage.content.length : 'non-string'
});
if (lastMessage && lastMessage.type === "ai" && lastMessage.id) {
// 创建当前时间线的快照
const snapshot = [...processedEventsTimeline];
console.log(`📷 为消息 ${lastMessage.id} 保存快照 (${snapshot.length} 事件):`, snapshot.map(e => e.title));
setHistoricalActivities((prev) => {
const newActivities = {
...prev,
[lastMessage.id!]: snapshot,
};
console.log(`✅ 快照已保存,历史活动数:`, Object.keys(newActivities).length);
return newActivities;
});
} else {
console.warn(`⚠️ 无法保存快照 ${stateName}: 最后一条消息不是AI消息`);
}
} else {
console.warn(`⚠️ 无法保存快照 ${stateName}: 没有消息`);
}
}, 300); // 增加延迟到300ms
}, [thread.messages, processedEventsTimeline]);
return (