Showing preview only (2,897K chars total). Download the full file or copy to clipboard to get everything.
Repository: DemonDamon/Listed-company-news-crawl-and-text-analysis
Branch: main
Commit: d7a20a1f7ee8
Files: 293
Total size: 2.5 MB
Directory structure:
gitextract_w0u594fz/
├── .deepsource.toml
├── .gitignore
├── LICENSE
├── README.md
├── README_zn.md
├── backend/
│ ├── .gitignore
│ ├── README.md
│ ├── README_zn.md
│ ├── add_raw_html_column.py
│ ├── app/
│ │ ├── __init__.py
│ │ ├── agents/
│ │ │ ├── __init__.py
│ │ │ ├── data_collector.py
│ │ │ ├── data_collector_v2.py
│ │ │ ├── debate_agents.py
│ │ │ ├── news_analyst.py
│ │ │ ├── orchestrator.py
│ │ │ ├── quantitative_agent.py
│ │ │ └── search_analyst.py
│ │ ├── alpha_mining/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── backtest/
│ │ │ │ ├── __init__.py
│ │ │ │ └── evaluator.py
│ │ │ ├── config.py
│ │ │ ├── dsl/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ops.py
│ │ │ │ └── vocab.py
│ │ │ ├── features/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── market.py
│ │ │ │ └── sentiment.py
│ │ │ ├── model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpha_generator.py
│ │ │ │ └── trainer.py
│ │ │ ├── tools/
│ │ │ │ ├── __init__.py
│ │ │ │ └── alpha_mining_tool.py
│ │ │ ├── utils.py
│ │ │ └── vm/
│ │ │ ├── __init__.py
│ │ │ └── factor_vm.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── v1/
│ │ │ ├── __init__.py
│ │ │ ├── agents.py
│ │ │ ├── alpha_mining.py
│ │ │ ├── analysis.py
│ │ │ ├── debug.py
│ │ │ ├── knowledge_graph.py
│ │ │ ├── llm_config.py
│ │ │ ├── news.py
│ │ │ ├── news_v2.py
│ │ │ ├── stocks.py
│ │ │ └── tasks.py
│ │ ├── config/
│ │ │ ├── __init__.py
│ │ │ └── debate_modes.yaml
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── celery_app.py
│ │ │ ├── config.py
│ │ │ ├── database.py
│ │ │ ├── neo4j_client.py
│ │ │ └── redis_client.py
│ │ ├── financial/
│ │ │ ├── __init__.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── news.py
│ │ │ │ └── stock.py
│ │ │ ├── providers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── eastmoney/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── nbd/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── netease/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── sina/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── tencent/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ └── yicai/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fetchers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── news.py
│ │ │ │ └── provider.py
│ │ │ ├── registry.py
│ │ │ └── tools.py
│ │ ├── knowledge/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── graph_models.py
│ │ │ ├── graph_service.py
│ │ │ ├── knowledge_extractor.py
│ │ │ └── parallel_search.py
│ │ ├── main.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── analysis.py
│ │ │ ├── crawl_task.py
│ │ │ ├── database.py
│ │ │ ├── debate_history.py
│ │ │ ├── news.py
│ │ │ └── stock.py
│ │ ├── scripts/
│ │ │ └── init_stocks.py
│ │ ├── services/
│ │ │ ├── __init__.py
│ │ │ ├── analysis_service.py
│ │ │ ├── embedding_service.py
│ │ │ ├── llm_service.py
│ │ │ └── stock_data_service.py
│ │ ├── storage/
│ │ │ ├── __init__.py
│ │ │ └── vector_storage.py
│ │ ├── tasks/
│ │ │ ├── __init__.py
│ │ │ └── crawl_tasks.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── bochaai_search.py
│ │ ├── caijing_crawler.py
│ │ ├── crawler_base.py
│ │ ├── crawler_enhanced.py
│ │ ├── dynamic_crawler_example.py
│ │ ├── eastmoney_crawler.py
│ │ ├── eeo_crawler.py
│ │ ├── interactive_crawler.py
│ │ ├── jingji21_crawler.py
│ │ ├── jwview_crawler.py
│ │ ├── nbd_crawler.py
│ │ ├── netease163_crawler.py
│ │ ├── search_engine_crawler.py
│ │ ├── sina_crawler.py
│ │ ├── tencent_crawler.py
│ │ ├── text_cleaner.py
│ │ └── yicai_crawler.py
│ ├── clear_news_data.py
│ ├── env.example
│ ├── init_db.py
│ ├── init_knowledge_graph.py
│ ├── requirements.txt
│ ├── reset_database.py
│ ├── setup_env.sh
│ ├── start.sh
│ ├── start_celery.sh
│ └── tests/
│ ├── __init__.py
│ ├── check_milvus_data.py
│ ├── check_news_embedding_status.py
│ ├── financial/
│ │ ├── __init__.py
│ │ ├── test_smoke_openbb_models.py
│ │ ├── test_smoke_openbb_provider.py
│ │ └── test_smoke_openbb_tools.py
│ ├── manual_vectorize.py
│ ├── test_alpha_mining/
│ │ ├── __init__.py
│ │ ├── test_integration_p2.py
│ │ ├── test_smoke_p0.py
│ │ └── test_smoke_p1.py
│ └── test_smoke_alpha_mining.py
├── deploy/
│ ├── Dockerfile.celery
│ ├── celery-entrypoint.sh
│ └── docker-compose.dev.yml
├── docs/
│ ├── BochaAI_Web_Search_API_20251222_121535.md
│ └── 天眼查MCP服务_20260104_171528.md
├── frontend/
│ ├── .gitignore
│ ├── QUICKSTART.md
│ ├── README.md
│ ├── index.html
│ ├── package.json
│ ├── postcss.config.js
│ ├── src/
│ │ ├── App.tsx
│ │ ├── components/
│ │ │ ├── DebateChatRoom.tsx
│ │ │ ├── DebateConfig.tsx
│ │ │ ├── DebateHistorySidebar.tsx
│ │ │ ├── HighlightText.tsx
│ │ │ ├── KLineChart.tsx
│ │ │ ├── MentionInput.tsx
│ │ │ ├── ModelSelector.tsx
│ │ │ ├── NewsDetailDrawer.tsx
│ │ │ ├── StockSearch.tsx
│ │ │ ├── alpha-mining/
│ │ │ │ ├── AgentDemo.tsx
│ │ │ │ ├── MetricsDashboard.tsx
│ │ │ │ ├── OperatorGrid.tsx
│ │ │ │ ├── SentimentCompare.tsx
│ │ │ │ ├── TrainingMonitor.tsx
│ │ │ │ └── index.ts
│ │ │ └── ui/
│ │ │ ├── badge.tsx
│ │ │ ├── button.tsx
│ │ │ ├── card.tsx
│ │ │ ├── dropdown-menu.tsx
│ │ │ ├── sheet.tsx
│ │ │ └── tabs.tsx
│ │ ├── context/
│ │ │ └── NewsToolbarContext.tsx
│ │ ├── hooks/
│ │ │ └── useDebounce.ts
│ │ ├── index.css
│ │ ├── layout/
│ │ │ └── MainLayout.tsx
│ │ ├── lib/
│ │ │ ├── api-client.ts
│ │ │ └── utils.ts
│ │ ├── main.tsx
│ │ ├── pages/
│ │ │ ├── AgentMonitorPage.tsx
│ │ │ ├── AlphaMiningPage.tsx
│ │ │ ├── Dashboard.tsx
│ │ │ ├── NewsListPage.tsx
│ │ │ ├── StockAnalysisPage.tsx
│ │ │ ├── StockSearchPage.tsx
│ │ │ └── TaskManagerPage.tsx
│ │ ├── store/
│ │ │ ├── useDebateStore.ts
│ │ │ ├── useLanguageStore.ts
│ │ │ ├── useNewsStore.ts
│ │ │ └── useTaskStore.ts
│ │ └── types/
│ │ └── api.ts
│ ├── tailwind.config.js
│ ├── tsconfig.json
│ ├── tsconfig.node.json
│ └── vite.config.ts
├── legacy_v1/
│ ├── .deepsource.toml
│ ├── Chinese_Stop_Words.txt
│ ├── Crawler/
│ │ ├── __init__.py
│ │ ├── crawler_cnstock.py
│ │ ├── crawler_jrj.py
│ │ ├── crawler_nbd.py
│ │ ├── crawler_sina.py
│ │ ├── crawler_stcn.py
│ │ └── crawler_tushare.py
│ ├── README_OLD.md
│ ├── Text_Analysis/
│ │ ├── __init__.py
│ │ ├── text_mining.py
│ │ └── text_processing.py
│ ├── finance_dict.txt
│ ├── run_crawler_cnstock.py
│ ├── run_crawler_jrj.py
│ ├── run_crawler_nbd.py
│ ├── run_crawler_sina.py
│ ├── run_crawler_stcn.py
│ ├── run_crawler_tushare.py
│ ├── run_main.py
│ └── src/
│ ├── Gon/
│ │ ├── __init__.py
│ │ ├── cnstockspyder.py
│ │ ├── history_starter_cnstock.py
│ │ ├── history_starter_jrj.py
│ │ ├── history_starter_nbd.py
│ │ ├── history_starter_stock_price.py
│ │ ├── ifengspyder.py
│ │ ├── jrjspyder.py
│ │ ├── kill_realtime_spyder_tasks.py
│ │ ├── money163spyder.py
│ │ ├── nbdspyder.py
│ │ ├── realtime_starter_cnstock.py
│ │ ├── realtime_starter_jrj.py
│ │ ├── realtime_starter_nbd.py
│ │ ├── realtime_starter_redis_queue.py
│ │ ├── realtime_starter_stock_price.py
│ │ ├── sinaspyder.py
│ │ ├── spyder.py
│ │ └── stockinfospyder.py
│ ├── Hisoka/
│ │ └── classifier.py
│ ├── Killua/
│ │ ├── __init__.py
│ │ ├── buildstocknewsdb.py
│ │ ├── deduplication.py
│ │ └── denull.py
│ ├── Kite/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── database.py
│ │ ├── log.py
│ │ ├── utils.py
│ │ └── webserver.py
│ ├── Leorio/
│ │ ├── __init__.py
│ │ ├── chnstopwords.txt
│ │ ├── financedict.txt
│ │ ├── tokenization.py
│ │ └── topicmodelling.py
│ ├── __init__.py
│ ├── history_spyder_startup.bat
│ ├── main.py
│ ├── realtime_spyder_startup.bat
│ └── realtime_spyder_stopall.bat
├── reset_all_data.sh
└── thirdparty/
├── DISC-FinLLM.md
├── ElegantRL.md
├── FinCast-fts.md
├── FinGPT.md
├── FinGenius.md
├── FinRL-Meta.md
├── FinRL.md
├── FinRobot.md
├── FinceptTerminal.md
├── Kronos.md
├── Lean.md
├── README.md
├── TradingAgents-CN.md
├── TradingAgents.md
├── TrendRadar.md
├── agentic-trading.md
├── awesome-quant.md
├── backtrader.md
├── investor-agent.md
├── panda_quantflow.md
├── qlib.md
└── vnpy.md
================================================
FILE CONTENTS
================================================
================================================
FILE: .deepsource.toml
================================================
version = 1
[[analyzers]]
name = "python"
[analyzers.meta]
runtime_version = "3.x.x"
================================================
FILE: .gitignore
================================================
# Development documentation (local only, not for Git)
devlogs/
conclusions/
researches/
# Python
__pycache__/
*.py[cod]
*$py.class
# Virtual environments
venv/
env/
ENV/
# IDE
.vscode/
.idea/
*.swp
# OS
.DS_Store
node_modules/
**/node_modules/backend/celerybeat-schedule*
backend/.crawl_cache/
backend/celerybeat-schedule
backend/reproduce_sina.py
backend/checkpoints/
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2025 Ziran Li
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
# FinnewsHunter: Multi-Agent Investment Decision Platform Driven by Financial News
<div align="right">
<a href="README_zn.md">中文版</a> | <a href="README.md">English</a>
</div>
<div align="center">
<img src="assets/images/FINNEWS_HUNTER_LOGO.png" alt="FinnewsHunter Logo" width="450">
</div>
An enterprise-grade financial news analysis system built on the [AgenticX](https://github.com/DemonDamon/AgenticX) framework, integrating real-time news streams, deep quantitative analysis, and multi-agent debate mechanisms.
FinnewsHunter goes beyond traditional text classification by deploying multi-agent teams (NewsAnalyst, Researcher, etc.) to monitor multiple financial news sources in real-time, including Sina Finance, National Business Daily, Financial World, Securities Times, and more. It leverages large language models for deep interpretation, sentiment analysis, and market impact assessment, combined with knowledge graphs to mine potential investment opportunities and risks, providing decision-level alpha signals for quantitative trading.
---
## 🎯 Project Features
- ✅ **AgenticX Native**: Deeply integrated with AgenticX framework, using core abstractions like Agent, Tool, and Workflow
- ✅ **AgenticX Component Integration**: Direct use of AgenticX's `BailianEmbeddingProvider` and `MilvusStorage`, avoiding reinventing the wheel
- ✅ **Agent-Driven**: NewsAnalyst agent automatically analyzes news sentiment and market impact
- ✅ **Multi-Provider LLM Support**: Supports 5 major LLM providers (Bailian, OpenAI, DeepSeek, Kimi, Zhipu), switchable with one click in the frontend
- ✅ **Batch Operations**: Supports batch selection, batch deletion, and batch analysis of news, improving operational efficiency
- ✅ **Stock K-Line Analysis**: Integrated with akshare real market data, supporting daily/minute K-line multi-period display
- ✅ **Intelligent Stock Search**: Supports code and name fuzzy queries, pre-loaded with 5000+ A-share data
- ✅ **Complete Tech Stack**: FastAPI + PostgreSQL + Milvus + Redis + React
- ✅ **Real-time Search**: Supports multi-dimensional search by title, content, stock code, with keyword highlighting
- ✅ **Async Vectorization**: Background async vectorization execution, non-blocking analysis flow
- ✅ **Production Ready**: One-click deployment with Docker Compose, complete logging and monitoring
---
## 🏗️ System Architecture

The system adopts a layered architecture design:
- **M6 Frontend Interaction Layer**: React + TypeScript + Shadcn UI
- **M1 Platform Service Layer**: FastAPI Gateway + Task Manager
- **M4/M5 Agent Collaboration Layer**: AgenticX Agent + Debate Workflow
- **M2/M3 Infrastructure Layer**: Crawler Service + LLM Service + Embedding
- **M7-M11 Storage & Learning Layer**: PostgreSQL + Milvus + Redis + ACE Framework
---
## 🚀 Quick Start
### Prerequisites
- Python 3.11+
- Docker & Docker Compose
- (Optional) OpenAI API Key or local LLM
- Node.js 18+ (for frontend development)
### 1. Install AgenticX
```bash
cd /Users/damon/myWork/AgenticX
pip install -e .
```
### 2. Install Backend Dependencies
```bash
cd FinnewsHunter/backend
pip install -r requirements.txt
```
### 3. Configure Environment Variables
```bash
cd FinnewsHunter/backend
cp env.example .env
# Edit .env file and fill in LLM API Key and other configurations
```
**Multi-Provider LLM Configuration:**
The system supports 5 LLM providers, at least one needs to be configured:
| Provider | Environment Variable | Registration URL |
|----------|---------------------|------------------|
| Bailian (Alibaba Cloud) | `DASHSCOPE_API_KEY` | https://dashscope.console.aliyun.com/ |
| OpenAI | `OPENAI_API_KEY` | https://platform.openai.com/api-keys |
| DeepSeek | `DEEPSEEK_API_KEY` | https://platform.deepseek.com/ |
| Kimi (Moonshot) | `MOONSHOT_API_KEY` | https://platform.moonshot.cn/ |
| Zhipu | `ZHIPU_API_KEY` | https://open.bigmodel.cn/ |
**Example Configuration (Recommended: Bailian):**
```bash
# Bailian (Alibaba Cloud) - Recommended, fast access in China
DASHSCOPE_API_KEY=sk-your-dashscope-key
DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
BAILIAN_MODELS=qwen-plus,qwen-max,qwen-turbo
# Optional: Other providers
OPENAI_API_KEY=sk-your-openai-key
DEEPSEEK_API_KEY=sk-your-deepseek-key
```
### 4. Start Base Services (PostgreSQL, Redis, Milvus)
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml up -d postgres redis milvus-etcd milvus-minio milvus-standalone
```
### 5. Initialize Database
```bash
cd FinnewsHunter/backend
python init_db.py
```
### 5.1 Initialize Stock Data (Optional, for stock search functionality)
```bash
cd FinnewsHunter/backend
python -m app.scripts.init_stocks
# Will fetch all A-share data (approximately 5000+ stocks) from akshare and save to database
```
### 6. Start Backend API Service
```bash
cd FinnewsHunter/backend
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
```
### 7. Start Celery Worker and Beat (Auto Crawling)
```bash
# Open a new terminal
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml up -d celery-worker celery-beat
```
### 8. Start Frontend Service
```bash
# Open a new terminal
cd FinnewsHunter/frontend
npm install # First time requires dependency installation
npm run dev
```
### 9. Access Application
- **Frontend Interface**: http://localhost:3000
- **Backend API**: http://localhost:8000
- **API Documentation**: http://localhost:8000/docs
---
## 🔄 Service Management
### View All Service Status
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml ps
```
### Restart All Services
```bash
cd FinnewsHunter
# Restart Docker services (infrastructure + Celery)
docker compose -f deploy/docker-compose.dev.yml restart
# If backend API is started independently, manually restart it
# Press Ctrl+C to stop backend process, then rerun:
cd backend
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
```
### Restart Specific Service
```bash
cd FinnewsHunter
# Restart only Celery (after code changes)
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# Restart only database
docker compose -f deploy/docker-compose.dev.yml restart postgres
# Restart only Redis
docker compose -f deploy/docker-compose.dev.yml restart redis
```
### Stop All Services
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml down
```
### View Logs
```bash
cd FinnewsHunter
# View Celery Worker logs
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
# View Celery Beat logs (scheduled task dispatch)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# View PostgreSQL logs
docker compose -f deploy/docker-compose.dev.yml logs -f postgres
# View all service logs
docker compose -f deploy/docker-compose.dev.yml logs -f
```
---
## 🗑️ Reset Database
### Method 1: Use One-Click Reset Script (Recommended) ⭐
```bash
cd FinnewsHunter
# Execute reset script
./reset_all_data.sh
# Enter yes to confirm
```
**The script will automatically complete:**
1. ✅ Clear all news and task data in PostgreSQL
2. ✅ Clear Redis cache
3. ✅ Reset database auto-increment IDs (restart from 1)
4. ✅ Clear Celery schedule files
5. ✅ Automatically restart Celery services
**After execution, wait:**
- 5-10 minutes for the system to automatically re-crawl data
- Access frontend to view new data
---
### Method 2: Manual Reset (Advanced)
#### Step 1: Clear PostgreSQL Data
```bash
# Enter PostgreSQL container
docker exec -it finnews_postgres psql -U finnews -d finnews_db
```
Execute in PostgreSQL command line:
```sql
-- Clear news table
DELETE FROM news;
-- Clear task table
DELETE FROM crawl_tasks;
-- Clear analysis table
DELETE FROM analyses;
-- Reset auto-increment IDs
ALTER SEQUENCE news_id_seq RESTART WITH 1;
ALTER SEQUENCE crawl_tasks_id_seq RESTART WITH 1;
ALTER SEQUENCE analyses_id_seq RESTART WITH 1;
-- Verify results (should all be 0)
SELECT 'news table', COUNT(*) FROM news;
SELECT 'crawl_tasks table', COUNT(*) FROM crawl_tasks;
SELECT 'analyses table', COUNT(*) FROM analyses;
-- Exit
\q
```
#### Step 2: Clear Redis Cache
```bash
cd FinnewsHunter
docker exec finnews_redis redis-cli FLUSHDB
```
#### Step 3: Clear Celery Schedule Files
```bash
cd FinnewsHunter/backend
rm -f celerybeat-schedule*
```
#### Step 4: Restart Celery Services
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
#### Step 5: Verify Data Cleared
```bash
# Check news count (should be 0)
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT COUNT(*) FROM news;"
# Check Redis (should be 0 or very small)
docker exec finnews_redis redis-cli DBSIZE
# Check if Celery has started crawling
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# Should see 10 crawl tasks triggered per minute
```
---
### Method 3: Use Python Script Reset
```bash
cd FinnewsHunter/backend
python reset_database.py
# Enter yes to confirm
```
---
### Method 4: Quick Manual Cleanup (One-Line Commands) 🔥
**Use Case:** When reset script doesn't work, this is the fastest method
```bash
cd FinnewsHunter
# Step 1: Clear database tables
docker exec finnews_postgres psql -U finnews -d finnews_db -c "DELETE FROM news; DELETE FROM crawl_tasks; DELETE FROM analyses;"
# Step 2: Reset auto-increment IDs
docker exec finnews_postgres psql -U finnews -d finnews_db -c "ALTER SEQUENCE news_id_seq RESTART WITH 1; ALTER SEQUENCE crawl_tasks_id_seq RESTART WITH 1; ALTER SEQUENCE analyses_id_seq RESTART WITH 1;"
# Step 3: Clear Redis cache
docker exec finnews_redis redis-cli FLUSHDB
# Step 4: Clear Celery schedule files
rm -f backend/celerybeat-schedule*
# Step 5: Restart Celery services
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# Step 6: Verify cleared (should display 0)
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT COUNT(*) FROM news;"
```
**Immediately refresh browser after execution:**
- Mac: `Command + Shift + R`
- Windows: `Ctrl + Shift + R`
---
### 🖥️ Clear Frontend Cache (Important!)
**After data is cleared, frontend may still display old data due to browser cache.**
#### Method 1: Hard Refresh Browser (Recommended) ⭐
**Mac System:**
```
Press Command + Shift + R
or Command + Option + R
```
**Windows/Linux System:**
```
Press Ctrl + Shift + R
or Ctrl + F5
```
#### Method 2: Developer Tools Clear Cache
1. Press `F12` to open developer tools
2. Right-click the refresh button (next to address bar)
3. Select **"Empty Cache and Hard Reload"**
#### Method 3: Clear Browser Cache
1. **Chrome/Edge:**
- `Command + Shift + Delete` (Mac) or `Ctrl + Shift + Delete` (Windows)
- Check "Cached images and files"
- Time range select "All time"
- Click "Clear data"
2. **After refreshing page, hard refresh again**
- Ensure React Query cache is also cleared
#### Method 4: Restart Frontend Dev Server (Most Thorough)
```bash
# Press Ctrl+C in frontend terminal to stop service
# Then restart
cd FinnewsHunter/frontend
npm run dev
```
---
## 📊 Data Recovery Timeline After Reset
| Time | Event | Expected Result |
|------|-------|----------------|
| 0 min | Execute reset script | Database cleared, Redis cleared |
| 1 min | Celery Beat starts scheduling | 10 crawl tasks triggered |
| 2-5 min | First batch of news saved | Database starts having data |
| 5-10 min | All sources have data | Frontend can see 100+ news |
| 30 min | Data continues growing | 500+ news |
| 1 hour | Stable operation | 1000-2000 news |
**Notes:**
- Need to wait 5-10 minutes after reset to see new data
- **Frontend must hard refresh** (Command+Shift+R / Ctrl+Shift+R) to clear cache
- Don't reset frequently, affects system stability
**Steps to immediately hard refresh frontend after reset:**
1. Execute reset command
2. **Immediately** press `Command + Shift + R` (Mac) or `Ctrl + Shift + R` (Windows) in browser
3. Wait 5-10 minutes then refresh again to view new data
---
## ⚠️ Crawler Status Check
### Check Which Sources Are Working
```bash
cd FinnewsHunter
# View news count by source
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source, COUNT(*) as count
FROM news
WHERE created_at > NOW() - INTERVAL '1 hour'
GROUP BY source
ORDER BY count DESC;
"
# View recent crawl task status
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source,
crawled_count,
saved_count,
status,
error_message
FROM crawl_tasks
WHERE created_at > NOW() - INTERVAL '10 minutes'
ORDER BY created_at DESC
LIMIT 20;
"
```
### View Crawl Errors
```bash
cd FinnewsHunter
# View ERROR logs
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep ERROR
# View specific source issues
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep "jwview"
```
---
## 📚 User Guide
### Auto Crawl Mode (Recommended) ⭐
**System is configured with automatic crawling for 10 news sources:**
1. 🌐 Sina Finance
2. 🐧 Tencent Finance
3. 💰 Financial World
4. 📊 Economic Observer
5. 📈 Caijing.com
6. 📉 21st Century Business Herald
7. 📰 National Business Daily
8. 🎯 Yicai
9. 📧 NetEase Finance
10. 💎 East Money
**How it works:**
- ✅ Celery Beat automatically triggers crawling for all sources every 1 minute
- ✅ Automatic deduplication (URL level)
- ✅ Smart time filtering (keep news within 24 hours)
- ✅ Stock keyword filtering
- ✅ No manual operation needed
**View crawl progress:**
```bash
# View Celery Beat scheduling logs
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# View Celery Worker execution logs
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
```
---
### Manual Refresh (Get Latest Immediately)
**Method 1: Via Frontend**
1. Visit http://localhost:3000/news
2. Click the "🔄 Refresh Now" button in the top right
3. System will immediately trigger crawling, data updates in about 2 minutes
**Method 2: Via API**
```bash
# Force refresh Sina Finance
curl -X POST "http://localhost:8000/api/v1/news/refresh?source=sina"
# Force refresh all sources (need to call individually)
for source in sina tencent jwview eeo caijing jingji21 nbd yicai 163 eastmoney; do
curl -X POST "http://localhost:8000/api/v1/news/refresh?source=$source"
sleep 1
done
```
---
### View News List
**Method 1: Via Frontend (Recommended)**
- Visit http://localhost:3000
- Homepage: View source statistics and latest news
- News Feed: Filter news by source and sentiment
- Batch selection support: Use checkboxes to select multiple news, supports Shift key range selection
- Batch operations: Select all/deselect all, batch delete, batch analyze
**Method 2: Via API**
```bash
# Get latest news from all sources (200 items)
curl "http://localhost:8000/api/v1/news/latest?limit=200"
# Get news from specific source
curl "http://localhost:8000/api/v1/news/latest?source=sina&limit=50"
# Filter by sentiment (using old API)
curl "http://localhost:8000/api/v1/news/?sentiment=positive&limit=20"
# Get all available news source list
curl "http://localhost:8000/api/v1/news/sources"
```
---
### Batch Operations on News
**Frontend Operations:**
1. **Batch Selection**:
- Click checkbox on the left of news card to select single news
- Hold Shift key and click for range selection
- Use "Select All" button in top toolbar to select all news in current filter results
- Selection state automatically clears when switching news source or filter conditions
2. **Batch Delete**:
- After selecting multiple news, click "Batch Delete" button in top toolbar
- After confirming delete dialog, selected news will be deleted
- List automatically refreshes after deletion
3. **Batch Analysis**:
- After selecting multiple news, click "Batch Analyze" button in top toolbar
- System will analyze selected news sequentially, showing progress and result statistics
- After analysis completes, shows success/failure count
**API Operations:**
```bash
# Batch delete news
curl -X POST "http://localhost:8000/api/v1/news/batch/delete" \
-H "Content-Type: application/json" \
-d '{"news_ids": [1, 2, 3]}'
# Batch analyze news
curl -X POST "http://localhost:8000/api/v1/analysis/batch" \
-H "Content-Type: application/json" \
-d '{"news_ids": [1, 2, 3], "provider": "bailian", "model": "qwen-plus"}'
```
---
### Analyze News
**Method 1: Via Frontend**
- Click "✨ Analyze" button on news card
- Wait 3-5 seconds to view analysis results
- Click news card to open detail drawer, view complete analysis content
**Method 2: Via API**
```bash
# Analyze news with specified ID (using default model)
curl -X POST http://localhost:8000/api/v1/analysis/news/1
# Analyze news (specify model)
curl -X POST http://localhost:8000/api/v1/analysis/news/1 \
-H "Content-Type: application/json" \
-d '{"provider": "bailian", "model": "qwen-max"}'
# View analysis results
curl http://localhost:8000/api/v1/analysis/1
```
---
### Switch LLM Model
**Frontend Operations:**
1. Click model selector in top right (shows current model name)
2. Select different provider and model from dropdown menu
3. Selection automatically saves, subsequent analyses will use new model
**Supported Models:**
- 🔥 **Bailian**: qwen-plus, qwen-max, qwen-turbo, qwen-long
- 🤖 **OpenAI**: gpt-4, gpt-4-turbo, gpt-3.5-turbo
- 🧠 **DeepSeek**: deepseek-chat, deepseek-coder
- 🌙 **Kimi**: moonshot-v1-8k, moonshot-v1-32k, moonshot-v1-128k
- 🔮 **Zhipu**: glm-4, glm-4-plus, glm-4-air
**API to Get Available Model List:**
```bash
curl http://localhost:8000/api/v1/llm/config
```
---
### Search News
**Frontend Operations:**
1. Enter keywords in top search box
2. Supports search: title, content, stock code, source
3. Matching keywords will be highlighted
4. Search has 300ms debounce, automatically searches after input stops
**Search Examples:**
- Search stock code: `600519` (Kweichow Moutai)
- Search keywords: `新能源` (new energy), `半导体` (semiconductor)
- Search source: `sina`, `eastmoney`
---
### View News Details
**Frontend Operations:**
1. Click any news card
2. Detail drawer slides out from right, displaying:
- 📰 News title and source
- 📊 Sentiment score (positive/negative/neutral)
- 📈 Associated stock codes
- 📝 Complete news content
- 🤖 AI analysis results (Markdown format)
- 🔗 Original article link
3. Click "Copy Analysis Content" to copy analysis report in Markdown format
---
### Stock K-Line Analysis
**Frontend Operations:**
1. Visit http://localhost:3000/stocks/SH600519 (Kweichow Moutai example)
2. Use top right search box to enter stock code or name (e.g., `茅台` (Moutai), `600519`)
3. Select time period: Daily K, 60min, 30min, 15min, 5min, 1min
4. Chart supports:
- 📈 K-line candlestick chart (OHLC)
- 📊 Volume bar chart
- 📉 MA moving averages (5/10/30/60 day)
**API Operations:**
```bash
# Get K-line data (daily, default 180 items)
curl "http://localhost:8000/api/v1/stocks/SH600519/kline?period=daily&limit=180"
# Get minute K-line (60-minute line)
curl "http://localhost:8000/api/v1/stocks/SH600519/kline?period=60m&limit=200"
# Search stocks
curl "http://localhost:8000/api/v1/stocks/search/realtime?q=茅台&limit=10"
# View stock count in database
curl "http://localhost:8000/api/v1/stocks/count"
```
---
### Filter by Source
**Frontend Operations:**
1. **Homepage (Dashboard)**
- View "News Source Statistics" card
- Click any source button to filter
- Display news count and list for that source
2. **News Feed Page**
- Top has 10 source filter buttons
- Click to switch and view different sources
- Supports source + sentiment dual filtering
**API Operations:**
```bash
# View Sina Finance news
curl "http://localhost:8000/api/v1/news/latest?source=sina&limit=50"
# View National Business Daily news
curl "http://localhost:8000/api/v1/news/latest?source=nbd&limit=50"
# View all sources
curl "http://localhost:8000/api/v1/news/latest?limit=200"
```
---
## 🏗️ Project Structure
```
FinnewsHunter/
├── backend/ # Backend service
│ ├── app/
│ │ ├── agents/ # Agent definitions (NewsAnalyst, debate agents, etc.)
│ │ ├── api/v1/ # FastAPI routes
│ │ │ ├── analysis.py # Analysis API (supports batch analysis)
│ │ │ ├── llm_config.py # LLM config API
│ │ │ ├── news_v2.py # News API (supports batch delete)
│ │ │ └── ...
│ │ ├── core/ # Core configuration (config, database, redis, neo4j)
│ │ ├── models/ # SQLAlchemy data models
│ │ ├── services/ # Business services
│ │ │ ├── llm_service.py # LLM service (multi-provider support)
│ │ │ ├── analysis_service.py # Analysis service (async vectorization)
│ │ │ ├── embedding_service.py # Vectorization service (based on AgenticX BailianEmbeddingProvider)
│ │ │ └── stock_data_service.py # Stock data service
│ │ ├── storage/ # Storage wrapper
│ │ │ └── vector_storage.py # Milvus vector storage (based on AgenticX MilvusStorage)
│ │ ├── tasks/ # Celery tasks
│ │ └── tools/ # AgenticX tools (Crawler, Cleaner)
│ ├── tests/ # Test and utility scripts
│ │ ├── check_milvus_data.py # Check Milvus vector storage data
│ │ ├── check_news_embedding_status.py # Check news vectorization status
│ │ └── manual_vectorize.py # Manually vectorize specified news
│ ├── env.example # Environment variable template
│ └── requirements.txt # Python dependencies
├── frontend/ # React frontend
│ └── src/
│ ├── components/ # Components
│ │ ├── ModelSelector.tsx # LLM model selector
│ │ ├── NewsDetailDrawer.tsx # News detail drawer
│ │ └── HighlightText.tsx # Keyword highlighting
│ ├── context/ # React Context
│ ├── hooks/ # Custom Hooks
│ │ └── useDebounce.ts # Debounce Hook
│ ├── layout/ # Layout components
│ └── pages/ # Page components
│ └── NewsListPage.tsx # News list page (supports batch operations)
├── deploy/ # Deployment configuration
│ ├── docker-compose.dev.yml # Docker Compose configuration
│ ├── Dockerfile.celery # Celery image build file
│ └── celery-entrypoint.sh # Celery container startup script
├── conclusions/ # Module summary documentation
│ ├── backend/ # Backend module summaries
│ └── frontend/ # Frontend module summaries
└── .dev-docs/ # Development documentation
```
---
## 🧪 Testing & Acceptance
### MVP Acceptance Criteria
- [x] News crawling successful and saved to PostgreSQL
- [x] NewsAnalyst calls LLM to complete analysis
- [x] Analysis results include sentiment scores
- [x] Frontend can display news and analysis results
- [x] Support multi-provider LLM dynamic switching
- [x] News details display complete analysis content
- [x] Real-time search and filtering functionality
- [x] Batch selection, batch delete, batch analysis functionality
- [x] Vectorization and storage services based on AgenticX
- [x] Async vectorization, non-blocking analysis flow
### Testing Process
1. **Start All Services**
```bash
./start.sh
```
2. **Check Docker Container Status**
```bash
docker ps
# Should see: postgres, redis, milvus-standalone, milvus-etcd, milvus-minio
```
3. **Test News Crawling**
```bash
curl -X POST http://localhost:8000/api/v1/news/crawl \
-H "Content-Type: application/json" \
-d '{"source": "sina", "start_page": 1, "end_page": 1}'
# Wait 5-10 seconds then check results
curl http://localhost:8000/api/v1/news/?limit=5
```
4. **Test Agent Analysis**
```bash
# Get first news ID
NEWS_ID=$(curl -s http://localhost:8000/api/v1/news/?limit=1 | jq '.[0].id')
# Trigger analysis
curl -X POST http://localhost:8000/api/v1/analysis/news/$NEWS_ID
# View analysis results
curl http://localhost:8000/api/v1/analysis/1
```
5. **Test Frontend Interface**
- Open `frontend/index.html`
- Click "Crawl News" and wait for completion
- Select a news item and click "Analyze"
- Check if sentiment score is displayed
---
## 🔧 Troubleshooting
### Issue 1: Database Connection Failed
**Symptom:** Backend startup error `could not connect to database`
**Solution:**
```bash
cd FinnewsHunter
# Check if PostgreSQL is running
docker ps | grep postgres
# View logs
docker compose -f deploy/docker-compose.dev.yml logs postgres
# Restart container
docker compose -f deploy/docker-compose.dev.yml restart postgres
# Wait 30 seconds then retry backend startup
```
---
### Issue 2: Celery Tasks Not Executing
**Symptom:** Frontend shows 0 news count, no automatic crawling
**Troubleshooting Steps:**
```bash
cd FinnewsHunter
# 1. Check if Celery Worker is running
docker ps | grep celery
# 2. View Celery Beat logs (should see tasks triggered every minute)
docker compose -f deploy/docker-compose.dev.yml logs celery-beat --tail=100
# 3. View Celery Worker logs (check task execution)
docker compose -f deploy/docker-compose.dev.yml logs celery-worker --tail=100
# 4. Check Redis connection
docker exec finnews_redis redis-cli PING
# Should return PONG
# 5. Restart Celery services
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
---
### Issue 3: Crawling Failed (404 Error)
**Symptom:** Celery logs show `404 Client Error: Not Found`
**Cause:** News website URL has changed
**Solution:**
```bash
# 1. Manually visit URL to verify if available
curl -I https://finance.caijing.com.cn/
# 2. If URL changed, update corresponding crawler configuration
# Edit backend/app/tools/{source}_crawler.py
# Update BASE_URL and STOCK_URL
# 3. Clear Python cache
cd FinnewsHunter/backend
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# 4. Restart Celery
cd ..
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
---
### Issue 4: Only Sina Finance Has Data
**Symptom:** Other 9 sources have no news
**Possible Causes:**
1. Celery Beat configuration incomplete
2. Crawler code has errors
3. Website URL incorrect
**Solution:**
```bash
cd FinnewsHunter
# 1. Check Celery Beat configuration
docker compose -f deploy/docker-compose.dev.yml logs celery-beat | grep "crawl-"
# Should see 10 scheduled tasks (crawl-sina, crawl-tencent, ..., crawl-eastmoney)
# 2. Manually test single source crawling
docker exec -it finnews_celery_worker python -c "
from app.tools import get_crawler_tool
crawler = get_crawler_tool('nbd') # Test National Business Daily
news = crawler.crawl()
print(f'Crawled {len(news)} news items')
"
# 3. View data volume by source in database
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source, COUNT(*) as count
FROM news
GROUP BY source
ORDER BY count DESC;
"
# 4. If a source keeps failing, view detailed errors
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep "ERROR"
```
---
### Issue 5: LLM Call Failed
**Symptom:** Analysis functionality not working, error `LLM Provider NOT provided`
**Solution:**
```bash
cd FinnewsHunter/backend
# 1. Check if API Key is configured
grep -E "DASHSCOPE_API_KEY|OPENAI_API_KEY|DEEPSEEK_API_KEY" .env
# 2. Check if Base URL is correct (Bailian must configure)
grep DASHSCOPE_BASE_URL .env
# Should be: https://dashscope.aliyuncs.com/compatible-mode/v1
# 3. Verify LLM config API is normal
curl http://localhost:8000/api/v1/llm/config | jq '.providers[].has_api_key'
# At least one should return true
# 4. If using Bailian, ensure complete configuration
cat >> .env << EOF
DASHSCOPE_API_KEY=sk-your-key
DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
BAILIAN_MODELS=qwen-plus,qwen-max
EOF
# 5. Restart backend service
```
---
### Issue 6: Frontend Shows Blank or CORS Error
**Symptom:** Frontend cannot load data, browser Console shows CORS error
**Solution:**
```bash
# 1. Check backend CORS configuration
cd FinnewsHunter/backend
grep BACKEND_CORS_ORIGINS .env
# Should include http://localhost:3000
# 2. Check frontend API address configuration
cd ../frontend
cat .env
# VITE_API_URL should be http://localhost:8000
# 3. Hard refresh browser
# Chrome/Edge: Ctrl+Shift+R (Windows) or Cmd+Shift+R (Mac)
# 4. Restart frontend dev server
npm run dev
```
---
### Issue 7: Milvus Connection Failed
**Symptom:** Vector search functionality not working
**Solution:**
```bash
cd FinnewsHunter
# Milvus requires longer startup time (approximately 60 seconds)
docker compose -f deploy/docker-compose.dev.yml logs milvus-standalone
# Check health status
docker inspect finnews_milvus | grep -A 10 Health
# Restart Milvus related services
docker compose -f deploy/docker-compose.dev.yml restart milvus-etcd milvus-minio milvus-standalone
```
---
### Issue 8: Data Statistics Inaccurate
**Symptom:** Homepage shows news count doesn't match actual
**Solution:**
```bash
# Use reset script to clear data and start fresh
cd FinnewsHunter
./reset_all_data.sh
```
---
### Common Debugging Commands
```bash
cd FinnewsHunter
# View all container status
docker compose -f deploy/docker-compose.dev.yml ps
# View complete logs for a service
docker compose -f deploy/docker-compose.dev.yml logs celery-worker --tail=500
# Enter container for debugging
docker exec -it finnews_celery_worker bash
# View database connection
docker exec finnews_postgres psql -U finnews -d finnews_db -c "\conninfo"
# View Redis connection
docker exec finnews_redis redis-cli INFO
# Test network connectivity
docker exec finnews_celery_worker ping -c 3 postgres
```
---
## ⚡ Quick Reference (Common Commands)
### Project Directory
```bash
cd FinnewsHunter
```
### One-Click Operations
```bash
# Start all services
docker compose -f deploy/docker-compose.dev.yml up -d
# Stop all services
docker compose -f deploy/docker-compose.dev.yml down
# Restart Celery (after code updates)
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# Clear all data and start fresh
./reset_all_data.sh
```
### View Status
```bash
# Service status
docker compose -f deploy/docker-compose.dev.yml ps
# News count
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT source, COUNT(*) FROM news GROUP BY source;"
# Task count
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT status, COUNT(*) FROM crawl_tasks GROUP BY status;"
# Redis cache
docker exec finnews_redis redis-cli DBSIZE
```
### View Logs
```bash
# Celery Beat (scheduled dispatch)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# Celery Worker (task execution)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
# PostgreSQL
docker compose -f deploy/docker-compose.dev.yml logs -f postgres
# All services
docker compose -f deploy/docker-compose.dev.yml logs -f
```
### Direct Access
- **Frontend**: http://localhost:3000
- **Backend API**: http://localhost:8000
- **API Documentation**: http://localhost:8000/docs
---
## 📊 Database Structure
### News Table
- id, title, content, url, source
- publish_time, stock_codes
- sentiment_score, is_embedded
### Analysis Table
- id, news_id, agent_name
- sentiment, sentiment_score, confidence
- analysis_result, structured_data
### Stock Table
- id, code, name, industry, market
---
## 🛠️ Development Guide
### Add New Crawler
1. Inherit `BaseCrawler` class
2. Implement `crawl()` method
3. Register in `tools/__init__.py`
Example:
```python
# backend/app/tools/custom_crawler.py
from .crawler_base import BaseCrawler
class CustomCrawlerTool(BaseCrawler):
name = "custom_crawler"
def crawl(self, start_page, end_page):
# Implement crawling logic
pass
```
### Use Enhanced Crawler (Optional)
For scenarios requiring JS rendering or intelligent content extraction, use enhanced crawler:
```python
from app.tools.crawler_enhanced import crawl_url, EnhancedCrawler
# Quick crawl single URL
article = crawl_url("https://finance.sina.com.cn/xxx", engine='auto')
print(article.to_markdown())
# Get LLM message format (multimodal)
llm_messages = article.to_llm_message()
# Batch crawl (with cache)
crawler = EnhancedCrawler(use_cache=True)
articles = crawler.crawl_batch(urls, delay=1.0)
```
**Supported Engines:**
- `requests`: Basic HTTP requests (default)
- `playwright`: JS rendering (requires `playwright install chromium`)
- `jina`: Jina Reader API (requires `JINA_API_KEY` configuration)
- `auto`: Automatically select best engine
**Install Optional Dependencies:**
```bash
pip install markdownify readabilipy playwright
playwright install chromium # Optional, for JS rendering
```
---
### Add New Agent
1. Inherit `Agent` class
2. Define role, goal, backstory
3. Implement business methods
Example:
```python
# backend/app/agents/risk_analyst.py
from agenticx import Agent
class RiskAnalystAgent(Agent):
def __init__(self, llm_provider):
super().__init__(
name="RiskAnalyst",
role="Risk Analyst",
goal="Assess investment risks",
llm_provider=llm_provider
)
```
---
### Using AgenticX Components
FinnewsHunter deeply integrates AgenticX framework core components to avoid reinventing the wheel:
#### 1. Embedding Service
The system uses `agenticx.embeddings.BailianEmbeddingProvider` as the core embedding engine:
```python
from app.services.embedding_service import EmbeddingService
# Synchronous interface (for sync contexts)
embedding_service = EmbeddingService()
vector = embedding_service.embed_text("text content")
# Asynchronous interface (recommended for async contexts)
vector = await embedding_service.aembed_text("text content")
# Batch processing (Provider handles internal batching)
vectors = embedding_service.embed_batch(["text1", "text2", "text3"])
```
**Features**:
- Redis caching support to avoid duplicate calculations
- Automatic text length limit handling (6000 characters)
- Both sync and async interfaces to avoid event loop conflicts
#### 2. Vector Storage (Milvus)
The system uses `agenticx.storage.vectordb_storages.milvus.MilvusStorage` as the vector database:
```python
from app.storage.vector_storage import VectorStorage
vector_storage = VectorStorage()
# Store single vector
vector_storage.store_embedding(
news_id=1,
text="news content",
embedding=[0.1, 0.2, ...]
)
# Batch storage
vector_storage.store_embeddings_batch([
{"news_id": 1, "text": "content1", "embedding": [...]},
{"news_id": 2, "text": "content2", "embedding": [...]}
])
# Similarity search
results = vector_storage.search_similar(query_vector=[...], top_k=10)
# Get statistics (with query count fallback mechanism)
stats = vector_storage.get_stats()
```
**Features**:
- Direct use of AgenticX MilvusStorage, no duplicate implementation
- Compatibility interface for simplified calls
- Query count fallback when `num_entities` is inaccurate
- Async operation support to avoid blocking
#### 3. Async Embedding Best Practices
In async contexts (e.g., FastAPI routes), use async interfaces:
```python
from app.services.embedding_service import EmbeddingService
from app.storage.vector_storage import VectorStorage
async def analyze_news(news_id: int, text: str):
embedding_service = EmbeddingService()
vector_storage = VectorStorage()
# Use async interface to avoid event loop conflicts
embedding = await embedding_service.aembed_text(text)
# Store vector asynchronously in background (non-blocking)
asyncio.create_task(
vector_storage.store_embedding(news_id, text, embedding)
)
# Continue with analysis logic...
```
**Notes**:
- In async contexts, use `aembed_text()` instead of `embed_text()`
- Embedding operations run asynchronously in background, non-blocking
- Milvus `flush()` operation is optimized, not executed by default (relies on auto-flush)
---
## Multi-Agent Debate Architecture
FinnewsHunter's core feature is the **bull-bear debate mechanism**, through collaboration and confrontation of multiple professional agents, deeply mining investment value and risks of individual stocks.
### Core Participants
| Agent | Role | Core Responsibilities |
|-------|------|---------------------|
| **BullResearcher** | Bull Researcher | Mine growth potential, core positives, valuation advantages |
| **BearResearcher** | Bear Researcher | Identify downside risks, negative catalysts, refute optimistic expectations |
| **SearchAnalyst** | Search Analyst | Dynamically acquire data (AkShare/BochaAI/browser search) |
| **InvestmentManager** | Investment Manager | Host debate, evaluate argument quality, make final decisions |
### Debate Data Flow Architecture
```mermaid
graph TD
subgraph Debate Initiation
Manager[Investment Manager] -->|Opening Statement| Orchestrator[Debate Orchestrator]
end
subgraph Multi-Round Debate
Orchestrator -->|Round N| Bull[Bull Researcher]
Bull -->|Statement + Data Request| Orchestrator
Orchestrator -->|Trigger Search| Searcher[Search Analyst]
Searcher -->|Financial Data| AkShare[AkShare]
Searcher -->|Real-time News| BochaAI[BochaAI]
Searcher -->|Web Search| Browser[Browser Engine]
AkShare --> Context[Update Context]
BochaAI --> Context
Browser --> Context
Context --> Orchestrator
Orchestrator -->|Round N| Bear[Bear Researcher]
Bear -->|Statement + Data Request| Orchestrator
end
subgraph Final Decision
Orchestrator -->|Intelligent Data Supplement| Searcher
Orchestrator -->|Comprehensive Judgment| Manager
Manager -->|Investment Rating| Result[Final Report]
end
```
### Dynamic Search Mechanism
During debate, agents can request additional data through specific format:
```
[SEARCH: "Recent gross margin data" source:akshare] -- Get financial data from AkShare
[SEARCH: "Industry competition analysis" source:bochaai] -- Search news from BochaAI
[SEARCH: "Recent fund flows" source:akshare] -- Get fund flows
[SEARCH: "Competitor comparison analysis"] -- Automatically select best data source
```
**Supported Data Sources:**
- **AkShare**: Financial indicators, K-line market data, fund flows, institutional holdings
- **BochaAI**: Real-time news search, analyst reports
- **Browser Search**: Baidu News, Sogou, 360 and other multi-engine search
- **Knowledge Base**: Historical news and analysis data
---
## 📈 Roadmap
### Phase 1: MVP (Completed) ✅
- [x] Project infrastructure
- [x] Database models
- [x] Crawler tool refactoring (10 news sources)
- [x] LLM service integration
- [x] NewsAnalyst agent
- [x] FastAPI routes
- [x] React + TypeScript frontend
### Phase 1.5: Multi-Provider LLM Support (Completed) ✅
- [x] Support 5 major LLM providers (Bailian, OpenAI, DeepSeek, Kimi, Zhipu)
- [x] Frontend dynamic model switching
- [x] LLM config API (`/api/v1/llm/config`)
- [x] News detail drawer (complete content + AI analysis)
- [x] Real-time search functionality (multi-dimensional + keyword highlighting)
- [x] Markdown rendering (supports tables, code blocks)
- [x] One-click copy analysis report
### Phase 1.6: Stock Analysis & Enhanced Crawler (Completed) ✅
- [x] Stock K-line charts (integrated akshare + klinecharts)
- [x] Multi-period support (Daily K/60min/30min/15min/5min/1min)
- [x] Stock search (code/name fuzzy query, pre-loaded 5000+ A-shares)
- [x] Enhanced crawler module
- [x] Multi-engine support (Requests/Playwright/Jina)
- [x] Intelligent content extraction (readabilipy + heuristic algorithms)
- [x] Content quality assessment and auto-retry
- [x] Cache mechanism and unified Article model
### Phase 1.7: AgenticX Deep Integration & Batch Operations (Completed) ✅
- [x] Migrated to AgenticX BailianEmbeddingProvider (removed redundant batch processing logic)
- [x] Migrated to AgenticX MilvusStorage (simplified storage wrapper, removed duplicate code)
- [x] Async vectorization interfaces (aembed_text/aembed_batch), avoid event loop conflicts
- [x] Background async vectorization, non-blocking analysis flow
- [x] Milvus statistics optimization (query count fallback mechanism)
- [x] Frontend batch selection functionality (checkboxes + Shift range selection)
- [x] Batch delete news functionality
- [x] Batch analyze news functionality (with progress display and result statistics)
- [x] Docker Compose optimization (Celery image build, improved startup performance)
### Phase 2: Multi-Agent Debate (Completed) ✅
- [x] BullResearcher & BearResearcher agents
- [x] SearchAnalyst search analyst (dynamic data acquisition)
- [x] InvestmentManager investment manager decision
- [x] Debate orchestrator (DebateOrchestrator)
- [x] Dynamic search mechanism (on-demand data acquisition during debate)
- [x] Three debate modes: parallel analysis, real-time debate, quick analysis
- [ ] Real-time WebSocket push (in progress)
- [ ] Agent execution trace visualization (in progress)
### Phase 3: Knowledge Enhancement (Planned)
- [ ] Financial knowledge graph (Neo4j)
- [ ] Agent memory system
- [ ] GraphRetriever graph retrieval
### Phase 4: Self-Evolution (Planned)
- [ ] ACE framework integration
- [ ] Investment strategy Playbook
- [ ] Decision effectiveness evaluation and learning
---
## 📄 License
This project follows the AgenticX license.
---
## 🙏 Acknowledgments
- [AgenticX](https://github.com/yourusername/AgenticX) - Multi-agent framework
- [FastAPI](https://fastapi.tiangolo.com/) - Web framework
- [Milvus](https://milvus.io/) - Vector database
- [Alibaba Cloud Bailian](https://dashscope.console.aliyun.com/) - LLM service
- [Shadcn UI](https://ui.shadcn.com/) - Frontend component library
---
## ⭐ Star History
If you find this project helpful, please give it a Star ⭐️!
[](https://star-history.com/#DemonDamon/FinnewsHunter&Date)
---
**Built with ❤️ using AgenticX**
================================================
FILE: README_zn.md
================================================
# FinnewsHunter:金融新闻驱动的多智能体投资决策平台
<div align="right">
<a href="README_zn.md">中文版</a> | <a href="README.md">English</a>
</div>
<div align="center">
<img src="assets/images/FINNEWS_HUNTER_LOGO.png" alt="FinnewsHunter Logo" width="450">
</div>
基于 [AgenticX](https://github.com/DemonDamon/AgenticX) 框架构建的企业级金融新闻分析系统,融合实时新闻流、深度量化分析和多智能体辩论机制。
FinnewsHunter 不再局限于传统的文本分类,而是部署多智能体战队(NewsAnalyst, Researcher 等),实时监控新浪财经、每经网、金融界、证券时报等多源财经资讯。利用大模型进行深度解读、情感分析与市场影响评估,并结合知识图谱挖掘潜在的投资机会与风险,为量化交易提供决策级别的阿尔法信号。
---
## 🎯 项目特色
- ✅ **AgenticX 原生**: 深度集成 AgenticX 框架,使用 Agent、Tool、Workflow 等核心抽象
- ✅ **AgenticX 组件集成**: 直接使用 AgenticX 的 `BailianEmbeddingProvider` 和 `MilvusStorage`,避免重复造轮子
- ✅ **智能体驱动**: NewsAnalyst 智能体自动分析新闻情感和市场影响
- ✅ **多厂商 LLM 支持**: 支持百炼、OpenAI、DeepSeek、Kimi、智谱 5 大厂商,前端一键切换
- ✅ **批量操作**: 支持批量选择、批量删除、批量分析新闻,提高操作效率
- ✅ **股票 K 线分析**: 集成 akshare 真实行情数据,支持日K/分K多周期展示
- ✅ **股票智能搜索**: 支持代码和名称模糊查询,预加载 5000+ A股数据
- ✅ **完整技术栈**: FastAPI + PostgreSQL + Milvus + Redis + React
- ✅ **实时搜索**: 支持标题、内容、股票代码多维度搜索,关键词高亮
- ✅ **异步向量化**: 后台异步执行向量化,不阻塞分析流程
- ✅ **生产就绪**: Docker Compose 一键部署,日志、监控完备
---
## 🏗️ 系统架构

系统采用分层架构设计:
- **M6 前端交互层**: React + TypeScript + Shadcn UI
- **M1 平台服务层**: FastAPI Gateway + Task Manager
- **M4/M5 智能体协同层**: AgenticX Agent + Debate Workflow
- **M2/M3 基础设施层**: Crawler Service + LLM Service + Embedding
- **M7-M11 存储与学习层**: PostgreSQL + Milvus + Redis + ACE Framework
---
## 🚀 快速开始
### 前置条件
- Python 3.11+
- Docker & Docker Compose
- (可选) OpenAI API Key 或本地 LLM
- Node.js 18+ (前端开发)
### 1. 安装 AgenticX
```bash
cd /Users/damon/myWork/AgenticX
pip install -e .
```
### 2. 安装后端依赖
```bash
cd FinnewsHunter/backend
pip install -r requirements.txt
```
### 3. 配置环境变量
```bash
cd FinnewsHunter/backend
cp env.example .env
# 编辑 .env 文件,填入 LLM API Key 等配置
```
**多厂商 LLM 配置说明:**
系统支持 5 个 LLM 厂商,至少配置一个即可使用:
| 厂商 | 环境变量 | 获取地址 |
|------|----------|----------|
| 百炼(阿里云) | `DASHSCOPE_API_KEY` | https://dashscope.console.aliyun.com/ |
| OpenAI | `OPENAI_API_KEY` | https://platform.openai.com/api-keys |
| DeepSeek | `DEEPSEEK_API_KEY` | https://platform.deepseek.com/ |
| Kimi(Moonshot) | `MOONSHOT_API_KEY` | https://platform.moonshot.cn/ |
| 智谱 | `ZHIPU_API_KEY` | https://open.bigmodel.cn/ |
**示例配置(推荐百炼):**
```bash
# 百炼(阿里云)- 推荐,国内访问快
DASHSCOPE_API_KEY=sk-your-dashscope-key
DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
BAILIAN_MODELS=qwen-plus,qwen-max,qwen-turbo
# 可选:其他厂商
OPENAI_API_KEY=sk-your-openai-key
DEEPSEEK_API_KEY=sk-your-deepseek-key
```
### 4. 启动基础服务(PostgreSQL、Redis、Milvus)
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml up -d postgres redis milvus-etcd milvus-minio milvus-standalone
```
### 5. 初始化数据库
```bash
cd FinnewsHunter/backend
python init_db.py
```
### 5.1 初始化股票数据(可选,用于股票搜索功能)
```bash
cd FinnewsHunter/backend
python -m app.scripts.init_stocks
# 将从 akshare 获取全部 A 股数据(约 5000+ 只)并存入数据库
```
### 6. 启动后端API服务
```bash
cd FinnewsHunter/backend
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
```
### 7. 启动Celery Worker和Beat(自动爬取)
```bash
# 新开一个终端
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml up -d celery-worker celery-beat
```
### 8. 启动前端服务
```bash
# 新开一个终端
cd FinnewsHunter/frontend
npm install # 首次需要安装依赖
npm run dev
```
### 9. 访问应用
- **前端界面**: http://localhost:3000
- **后端 API**: http://localhost:8000
- **API 文档**: http://localhost:8000/docs
---
## 🔄 服务管理
### 查看所有服务状态
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml ps
```
### 重启所有服务
```bash
cd FinnewsHunter
# 重启Docker服务(基础设施 + Celery)
docker compose -f deploy/docker-compose.dev.yml restart
# 如果后端API是独立启动的,需要手动重启
# Ctrl+C 停止后端进程,然后重新运行:
cd backend
uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
```
### 重启特定服务
```bash
cd FinnewsHunter
# 只重启Celery(应用代码更改后)
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# 只重启数据库
docker compose -f deploy/docker-compose.dev.yml restart postgres
# 只重启Redis
docker compose -f deploy/docker-compose.dev.yml restart redis
```
### 停止所有服务
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml down
```
### 查看日志
```bash
cd FinnewsHunter
# 查看Celery Worker日志
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
# 查看Celery Beat日志(定时任务调度)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# 查看PostgreSQL日志
docker compose -f deploy/docker-compose.dev.yml logs -f postgres
# 查看所有服务日志
docker compose -f deploy/docker-compose.dev.yml logs -f
```
---
## 🗑️ 重置数据库
### 方式1:使用一键重置脚本(推荐)⭐
```bash
cd FinnewsHunter
# 执行重置脚本
./reset_all_data.sh
# 输入 yes 确认
```
**脚本会自动完成:**
1. ✅ 清空PostgreSQL中的所有新闻和任务数据
2. ✅ 清空Redis缓存
3. ✅ 重置数据库自增ID(从1重新开始)
4. ✅ 清空Celery调度文件
5. ✅ 自动重启Celery服务
**执行后等待:**
- 5-10分钟系统会自动重新爬取数据
- 访问前端查看新数据
---
### 方式2:手动重置(高级)
#### 步骤1:清空PostgreSQL数据
```bash
# 进入PostgreSQL容器
docker exec -it finnews_postgres psql -U finnews -d finnews_db
```
在PostgreSQL命令行中执行:
```sql
-- 清空新闻表
DELETE FROM news;
-- 清空任务表
DELETE FROM crawl_tasks;
-- 清空分析表
DELETE FROM analyses;
-- 重置自增ID
ALTER SEQUENCE news_id_seq RESTART WITH 1;
ALTER SEQUENCE crawl_tasks_id_seq RESTART WITH 1;
ALTER SEQUENCE analyses_id_seq RESTART WITH 1;
-- 验证结果(应该都是0)
SELECT 'news表', COUNT(*) FROM news;
SELECT 'crawl_tasks表', COUNT(*) FROM crawl_tasks;
SELECT 'analyses表', COUNT(*) FROM analyses;
-- 退出
\q
```
#### 步骤2:清空Redis缓存
```bash
cd FinnewsHunter
docker exec finnews_redis redis-cli FLUSHDB
```
#### 步骤3:清空Celery调度文件
```bash
cd FinnewsHunter/backend
rm -f celerybeat-schedule*
```
#### 步骤4:重启Celery服务
```bash
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
#### 步骤5:验证数据已清空
```bash
# 检查新闻数量(应该是0)
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT COUNT(*) FROM news;"
# 检查Redis(应该是0或很小)
docker exec finnews_redis redis-cli DBSIZE
# 查看Celery是否开始爬取
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# 应该看到每分钟触发10个爬取任务
```
---
### 方式3:使用Python脚本重置
```bash
cd FinnewsHunter/backend
python reset_database.py
# 输入 yes 确认
```
---
### 方式4:快速手动清理(一行命令)🔥
**适用场景:** 当重置脚本不工作时,使用此方法最快速
```bash
cd FinnewsHunter
# 步骤1:清空数据库表
docker exec finnews_postgres psql -U finnews -d finnews_db -c "DELETE FROM news; DELETE FROM crawl_tasks; DELETE FROM analyses;"
# 步骤2:重置自增ID
docker exec finnews_postgres psql -U finnews -d finnews_db -c "ALTER SEQUENCE news_id_seq RESTART WITH 1; ALTER SEQUENCE crawl_tasks_id_seq RESTART WITH 1; ALTER SEQUENCE analyses_id_seq RESTART WITH 1;"
# 步骤3:清空Redis缓存
docker exec finnews_redis redis-cli FLUSHDB
# 步骤4:清空Celery调度文件
rm -f backend/celerybeat-schedule*
# 步骤5:重启Celery服务
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# 步骤6:验证是否清空(应该显示0)
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT COUNT(*) FROM news;"
```
**执行后立即刷新浏览器:**
- Mac: `Command + Shift + R`
- Windows: `Ctrl + Shift + R`
---
### 🖥️ 清除前端缓存(重要!)
**数据清空后,前端可能仍显示旧数据,这是因为浏览器缓存。**
#### 方法1:硬刷新浏览器(推荐)⭐
**Mac系统:**
```
按 Command + Shift + R
或 Command + Option + R
```
**Windows/Linux系统:**
```
按 Ctrl + Shift + R
或 Ctrl + F5
```
#### 方法2:开发者工具清空缓存
1. 按 `F12` 打开开发者工具
2. 右键点击刷新按钮(地址栏旁边)
3. 选择 **"清空缓存并硬性重新加载"**
#### 方法3:清除浏览器缓存
1. **Chrome/Edge:**
- `Command + Shift + Delete` (Mac) 或 `Ctrl + Shift + Delete` (Windows)
- 勾选"缓存的图片和文件"
- 时间范围选择"全部"
- 点击"清除数据"
2. **刷新页面后,再次硬刷新**
- 确保React Query缓存也被清除
#### 方法4:重启前端开发服务器(最彻底)
```bash
# 在前端终端按 Ctrl+C 停止服务
# 然后重新启动
cd FinnewsHunter/frontend
npm run dev
```
---
## 📊 重置后的数据恢复时间线
| 时间 | 事件 | 预期结果 |
|------|------|----------|
| 0分钟 | 执行重置脚本 | 数据库清空,Redis清空 |
| 1分钟 | Celery Beat开始调度 | 10个爬取任务被触发 |
| 2-5分钟 | 第一批新闻保存 | 数据库开始有数据 |
| 5-10分钟 | 所有源都有数据 | 前端可看到100+条新闻 |
| 30分钟 | 数据持续增长 | 500+条新闻 |
| 1小时 | 稳定运行 | 1000-2000条新闻 |
**注意:**
- 重置后需要等待5-10分钟才能看到新数据
- **前端必须硬刷新**(Command+Shift+R / Ctrl+Shift+R)清除缓存
- 不要频繁重置,会影响系统稳定性
**重置后立即硬刷新前端的步骤:**
1. 执行重置命令
2. **立即**在浏览器按 `Command + Shift + R` (Mac) 或 `Ctrl + Shift + R` (Windows)
3. 等待5-10分钟后再次刷新查看新数据
---
## ⚠️ 爬虫状态检查
### 查看哪些源正常工作
```bash
cd FinnewsHunter
# 查看各源的新闻数量
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source, COUNT(*) as count
FROM news
WHERE created_at > NOW() - INTERVAL '1 hour'
GROUP BY source
ORDER BY count DESC;
"
# 查看最近的爬取任务状态
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source,
crawled_count,
saved_count,
status,
error_message
FROM crawl_tasks
WHERE created_at > NOW() - INTERVAL '10 minutes'
ORDER BY created_at DESC
LIMIT 20;
"
```
### 查看爬取错误
```bash
cd FinnewsHunter
# 查看ERROR日志
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep ERROR
# 查看特定源的问题
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep "jwview"
```
---
## 📚 使用指南
### 自动爬取模式(推荐)⭐
**系统已配置10个新闻源的自动爬取:**
1. 🌐 新浪财经
2. 🐧 腾讯财经
3. 💰 金融界
4. 📊 经济观察网
5. 📈 财经网
6. 📉 21经济网
7. 📰 每日经济新闻
8. 🎯 第一财经
9. 📧 网易财经
10. 💎 东方财富
**工作方式:**
- ✅ Celery Beat 每1分钟自动触发所有源的爬取
- ✅ 自动去重(URL级别)
- ✅ 智能时间筛选(保留24小时内新闻)
- ✅ 股票关键词筛选
- ✅ 无需手动操作
**查看爬取进度:**
```bash
# 查看Celery Beat调度日志
cd FinnewsHunter
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# 查看Celery Worker执行日志
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
```
---
### 手动刷新(立即获取最新)
**方式 1: 通过前端**
1. 访问 http://localhost:3000/news
2. 点击右上角"🔄 立即刷新"按钮
3. 系统会立即触发爬取,约2分钟后数据更新
**方式 2: 通过 API**
```bash
# 强制刷新新浪财经
curl -X POST "http://localhost:8000/api/v1/news/refresh?source=sina"
# 强制刷新所有源(需要逐个调用)
for source in sina tencent jwview eeo caijing jingji21 nbd yicai 163 eastmoney; do
curl -X POST "http://localhost:8000/api/v1/news/refresh?source=$source"
sleep 1
done
```
---
### 查看新闻列表
**方式 1: 通过前端(推荐)**
- 访问 http://localhost:3000
- 首页:查看来源统计和最新新闻
- 新闻流:按来源和情感筛选新闻
- 支持批量选择:使用复选框选择多条新闻,支持 Shift 键范围选择
- 批量操作:全选/取消全选、批量删除、批量分析
**方式 2: 通过 API**
```bash
# 获取所有来源的最新新闻(200条)
curl "http://localhost:8000/api/v1/news/latest?limit=200"
# 获取特定来源的新闻
curl "http://localhost:8000/api/v1/news/latest?source=sina&limit=50"
# 按情感筛选(使用旧接口)
curl "http://localhost:8000/api/v1/news/?sentiment=positive&limit=20"
# 获取所有可用的新闻源列表
curl "http://localhost:8000/api/v1/news/sources"
```
---
### 批量操作新闻
**前端操作:**
1. **批量选择**:
- 点击新闻卡片左侧的复选框选择单条新闻
- 按住 Shift 键点击可进行范围选择
- 使用顶部工具栏的"全选"按钮选择当前筛选结果的所有新闻
- 切换新闻源或筛选条件时,选择状态会自动清空
2. **批量删除**:
- 选择多条新闻后,点击顶部工具栏的"批量删除"按钮
- 确认删除对话框后,选中的新闻将被删除
- 删除后会自动刷新列表
3. **批量分析**:
- 选择多条新闻后,点击顶部工具栏的"批量分析"按钮
- 系统会依次分析选中的新闻,显示进度和结果统计
- 分析完成后会显示成功/失败数量
**API 操作:**
```bash
# 批量删除新闻
curl -X POST "http://localhost:8000/api/v1/news/batch/delete" \
-H "Content-Type: application/json" \
-d '{"news_ids": [1, 2, 3]}'
# 批量分析新闻
curl -X POST "http://localhost:8000/api/v1/analysis/batch" \
-H "Content-Type: application/json" \
-d '{"news_ids": [1, 2, 3], "provider": "bailian", "model": "qwen-plus"}'
```
---
### 分析新闻
**方式 1: 通过前端**
- 在新闻卡片上点击"✨ 分析"按钮
- 等待3-5秒查看分析结果
- 点击新闻卡片打开详情抽屉,查看完整分析内容
**方式 2: 通过 API**
```bash
# 分析指定ID的新闻(使用默认模型)
curl -X POST http://localhost:8000/api/v1/analysis/news/1
# 分析新闻(指定模型)
curl -X POST http://localhost:8000/api/v1/analysis/news/1 \
-H "Content-Type: application/json" \
-d '{"provider": "bailian", "model": "qwen-max"}'
# 查看分析结果
curl http://localhost:8000/api/v1/analysis/1
```
---
### 切换 LLM 模型
**前端操作:**
1. 点击右上角的模型选择器(显示当前模型名称)
2. 在下拉菜单中选择不同的厂商和模型
3. 选择后自动保存,后续分析将使用新模型
**支持的模型:**
- 🔥 **百炼**: qwen-plus, qwen-max, qwen-turbo, qwen-long
- 🤖 **OpenAI**: gpt-4, gpt-4-turbo, gpt-3.5-turbo
- 🧠 **DeepSeek**: deepseek-chat, deepseek-coder
- 🌙 **Kimi**: moonshot-v1-8k, moonshot-v1-32k, moonshot-v1-128k
- 🔮 **智谱**: glm-4, glm-4-plus, glm-4-air
**API 获取可用模型列表:**
```bash
curl http://localhost:8000/api/v1/llm/config
```
---
### 搜索新闻
**前端操作:**
1. 在顶部搜索框输入关键词
2. 支持搜索:标题、内容、股票代码、来源
3. 匹配的关键词会高亮显示
4. 搜索带有 300ms 防抖,输入停止后自动搜索
**搜索示例:**
- 搜索股票代码:`600519`(贵州茅台)
- 搜索关键词:`新能源`、`半导体`
- 搜索来源:`sina`、`eastmoney`
---
### 查看新闻详情
**前端操作:**
1. 点击任意新闻卡片
2. 右侧滑出详情抽屉,展示:
- 📰 新闻标题和来源
- 📊 情感评分(利好/利空/中性)
- 📈 关联股票代码
- 📝 完整新闻内容
- 🤖 AI 分析结果(Markdown 格式)
- 🔗 原文链接
3. 点击"复制分析内容"可复制 Markdown 格式的分析报告
---
### 股票 K 线分析
**前端操作:**
1. 访问 http://localhost:3000/stocks/SH600519(贵州茅台示例)
2. 使用右上角搜索框输入股票代码或名称(如 `茅台`、`600519`)
3. 选择时间周期:日K、60分、30分、15分、5分、1分
4. 图表支持:
- 📈 K 线蜡烛图(OHLC)
- 📊 成交量柱状图
- 📉 MA 均线(5/10/30/60日)
**API 操作:**
```bash
# 获取 K 线数据(日线,默认180条)
curl "http://localhost:8000/api/v1/stocks/SH600519/kline?period=daily&limit=180"
# 获取分钟 K 线(60分钟线)
curl "http://localhost:8000/api/v1/stocks/SH600519/kline?period=60m&limit=200"
# 搜索股票
curl "http://localhost:8000/api/v1/stocks/search/realtime?q=茅台&limit=10"
# 查看数据库中的股票数量
curl "http://localhost:8000/api/v1/stocks/count"
```
---
### 按来源筛选查看
**前端操作:**
1. **首页(Dashboard)**
- 查看"新闻来源统计"卡片
- 点击任意来源按钮筛选
- 显示该来源的新闻数量和列表
2. **新闻流页面**
- 顶部有10个来源筛选按钮
- 点击切换查看不同来源
- 支持来源+情感双重筛选
**API操作:**
```bash
# 查看新浪财经的新闻
curl "http://localhost:8000/api/v1/news/latest?source=sina&limit=50"
# 查看每日经济新闻
curl "http://localhost:8000/api/v1/news/latest?source=nbd&limit=50"
# 查看所有来源
curl "http://localhost:8000/api/v1/news/latest?limit=200"
```
---
## 🏗️ 项目结构
```
FinnewsHunter/
├── backend/ # 后端服务
│ ├── app/
│ │ ├── agents/ # 智能体定义(NewsAnalyst、辩论智能体等)
│ │ ├── api/v1/ # FastAPI 路由
│ │ │ ├── analysis.py # 分析 API(支持批量分析)
│ │ │ ├── llm_config.py # LLM 配置 API
│ │ │ ├── news_v2.py # 新闻 API(支持批量删除)
│ │ │ └── ...
│ │ ├── core/ # 核心配置(config, database, redis, neo4j)
│ │ ├── models/ # SQLAlchemy 数据模型
│ │ ├── services/ # 业务服务
│ │ │ ├── llm_service.py # LLM 服务(支持多厂商)
│ │ │ ├── analysis_service.py # 分析服务(异步向量化)
│ │ │ ├── embedding_service.py # 向量化服务(基于 AgenticX BailianEmbeddingProvider)
│ │ │ └── stock_data_service.py # 股票数据服务
│ │ ├── storage/ # 存储封装
│ │ │ └── vector_storage.py # Milvus 向量存储(基于 AgenticX MilvusStorage)
│ │ ├── tasks/ # Celery 任务
│ │ └── tools/ # AgenticX 工具(Crawler, Cleaner)
│ ├── tests/ # 测试和工具脚本
│ │ ├── check_milvus_data.py # 检查 Milvus 向量存储数据
│ │ ├── check_news_embedding_status.py # 检查新闻向量化状态
│ │ └── manual_vectorize.py # 手动向量化指定新闻
│ ├── env.example # 环境变量模板
│ └── requirements.txt # Python 依赖
├── frontend/ # React 前端
│ └── src/
│ ├── components/ # 组件
│ │ ├── ModelSelector.tsx # LLM 模型选择器
│ │ ├── NewsDetailDrawer.tsx # 新闻详情抽屉
│ │ └── HighlightText.tsx # 关键词高亮
│ ├── context/ # React Context
│ ├── hooks/ # 自定义 Hooks
│ │ └── useDebounce.ts # 防抖 Hook
│ ├── layout/ # 布局组件
│ └── pages/ # 页面组件
│ └── NewsListPage.tsx # 新闻列表页面(支持批量操作)
├── deploy/ # 部署配置
│ ├── docker-compose.dev.yml # Docker Compose 配置
│ ├── Dockerfile.celery # Celery 镜像构建文件
│ └── celery-entrypoint.sh # Celery 容器启动脚本
├── conclusions/ # 模块摘要文档
│ ├── backend/ # 后端模块总结
│ └── frontend/ # 前端模块总结
└── .dev-docs/ # 开发文档
```
---
## 🧪 测试与验收
### MVP 验收标准
- [x] 新闻爬取成功并存入 PostgreSQL
- [x] NewsAnalyst 调用 LLM 完成分析
- [x] 分析结果包含情感评分
- [x] 前端能够展示新闻和分析结果
- [x] 支持多厂商 LLM 动态切换
- [x] 新闻详情展示完整分析内容
- [x] 实时搜索和筛选功能
- [x] 批量选择、批量删除、批量分析功能
- [x] 基于 AgenticX 的向量化和存储服务
- [x] 异步向量化,不阻塞分析流程
### 测试流程
1. **启动所有服务**
```bash
./start.sh
```
2. **检查 Docker 容器状态**
```bash
docker ps
# 应看到: postgres, redis, milvus-standalone, milvus-etcd, milvus-minio
```
3. **测试新闻爬取**
```bash
curl -X POST http://localhost:8000/api/v1/news/crawl \
-H "Content-Type: application/json" \
-d '{"source": "sina", "start_page": 1, "end_page": 1}'
# 等待 5-10 秒后查看结果
curl http://localhost:8000/api/v1/news/?limit=5
```
4. **测试智能体分析**
```bash
# 获取第一条新闻的ID
NEWS_ID=$(curl -s http://localhost:8000/api/v1/news/?limit=1 | jq '.[0].id')
# 触发分析
curl -X POST http://localhost:8000/api/v1/analysis/news/$NEWS_ID
# 查看分析结果
curl http://localhost:8000/api/v1/analysis/1
```
5. **测试前端界面**
- 打开 `frontend/index.html`
- 点击"爬取新闻"并等待完成
- 选择一条新闻点击"分析"
- 查看情感评分是否显示
---
## 🔧 故障排查
### 问题 1: 数据库连接失败
**症状:** 后端启动报错 `could not connect to database`
**解决方法:**
```bash
cd FinnewsHunter
# 检查 PostgreSQL 是否启动
docker ps | grep postgres
# 查看日志
docker compose -f deploy/docker-compose.dev.yml logs postgres
# 重启容器
docker compose -f deploy/docker-compose.dev.yml restart postgres
# 等待30秒后重试后端启动
```
---
### 问题 2: Celery任务不执行
**症状:** 前端显示新闻数量为0,没有自动爬取
**排查步骤:**
```bash
cd FinnewsHunter
# 1. 检查Celery Worker是否运行
docker ps | grep celery
# 2. 查看Celery Beat日志(应该看到每分钟触发任务)
docker compose -f deploy/docker-compose.dev.yml logs celery-beat --tail=100
# 3. 查看Celery Worker日志(查看任务执行情况)
docker compose -f deploy/docker-compose.dev.yml logs celery-worker --tail=100
# 4. 检查Redis连接
docker exec finnews_redis redis-cli PING
# 应该返回 PONG
# 5. 重启Celery服务
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
---
### 问题 3: 爬取失败(404错误)
**症状:** Celery日志显示 `404 Client Error: Not Found`
**原因:** 新闻网站URL已变更
**解决方法:**
```bash
# 1. 手动访问URL验证是否可用
curl -I https://finance.caijing.com.cn/
# 2. 如果URL变更,更新对应爬虫的配置
# 编辑 backend/app/tools/{source}_crawler.py
# 更新 BASE_URL 和 STOCK_URL
# 3. 清理Python缓存
cd FinnewsHunter/backend
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
# 4. 重启Celery
cd ..
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
```
---
### 问题 4: 只有新浪财经有数据
**症状:** 其他9个来源没有新闻
**可能原因:**
1. Celery Beat配置不完整
2. 爬虫代码有错误
3. 网站URL不正确
**解决方法:**
```bash
cd FinnewsHunter
# 1. 检查Celery Beat配置
docker compose -f deploy/docker-compose.dev.yml logs celery-beat | grep "crawl-"
# 应该看到10个定时任务(crawl-sina, crawl-tencent, ..., crawl-eastmoney)
# 2. 手动测试单个源的爬取
docker exec -it finnews_celery_worker python -c "
from app.tools import get_crawler_tool
crawler = get_crawler_tool('nbd') # 测试每日经济新闻
news = crawler.crawl()
print(f'爬取到 {len(news)} 条新闻')
"
# 3. 查看数据库中各源的数据量
docker exec finnews_postgres psql -U finnews -d finnews_db -c "
SELECT source, COUNT(*) as count
FROM news
GROUP BY source
ORDER BY count DESC;
"
# 4. 如果某个源一直失败,查看详细错误
docker compose -f deploy/docker-compose.dev.yml logs celery-worker | grep "ERROR"
```
---
### 问题 5: LLM 调用失败
**症状:** 分析功能不工作,报错 `LLM Provider NOT provided`
**解决方法:**
```bash
cd FinnewsHunter/backend
# 1. 检查 API Key 是否配置
grep -E "DASHSCOPE_API_KEY|OPENAI_API_KEY|DEEPSEEK_API_KEY" .env
# 2. 检查 Base URL 是否正确(百炼必须配置)
grep DASHSCOPE_BASE_URL .env
# 应该是: https://dashscope.aliyuncs.com/compatible-mode/v1
# 3. 验证 LLM 配置 API 是否正常
curl http://localhost:8000/api/v1/llm/config | jq '.providers[].has_api_key'
# 至少有一个返回 true
# 4. 如果使用百炼,确保配置完整
cat >> .env << EOF
DASHSCOPE_API_KEY=sk-your-key
DASHSCOPE_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
BAILIAN_MODELS=qwen-plus,qwen-max
EOF
# 5. 重启后端服务
```
---
### 问题 6: 前端显示空白或CORS错误
**症状:** 前端无法加载数据,浏览器Console显示CORS错误
**解决方法:**
```bash
# 1. 检查后端CORS配置
cd FinnewsHunter/backend
grep BACKEND_CORS_ORIGINS .env
# 应该包含 http://localhost:3000
# 2. 检查前端API地址配置
cd ../frontend
cat .env
# VITE_API_URL 应该是 http://localhost:8000
# 3. 硬刷新浏览器
# Chrome/Edge: Ctrl+Shift+R (Windows) 或 Cmd+Shift+R (Mac)
# 4. 重启前端开发服务器
npm run dev
```
---
### 问题 7: Milvus 连接失败
**症状:** 向量搜索功能不工作
**解决方法:**
```bash
cd FinnewsHunter
# Milvus 需要较长启动时间(约 60 秒)
docker compose -f deploy/docker-compose.dev.yml logs milvus-standalone
# 检查健康状态
docker inspect finnews_milvus | grep -A 10 Health
# 重启Milvus相关服务
docker compose -f deploy/docker-compose.dev.yml restart milvus-etcd milvus-minio milvus-standalone
```
---
### 问题 8: 数据统计不准确
**症状:** 首页显示的新闻数和实际不符
**解决方法:**
```bash
# 使用重置脚本清空数据重新开始
cd FinnewsHunter
./reset_all_data.sh
```
---
### 常用调试命令
```bash
cd FinnewsHunter
# 查看所有容器状态
docker compose -f deploy/docker-compose.dev.yml ps
# 查看某个服务的完整日志
docker compose -f deploy/docker-compose.dev.yml logs celery-worker --tail=500
# 进入容器调试
docker exec -it finnews_celery_worker bash
# 查看数据库连接
docker exec finnews_postgres psql -U finnews -d finnews_db -c "\conninfo"
# 查看Redis连接
docker exec finnews_redis redis-cli INFO
# 测试网络连通性
docker exec finnews_celery_worker ping -c 3 postgres
```
---
## ⚡ 快速参考(常用命令)
### 项目目录
```bash
cd FinnewsHunter
```
### 一键操作
```bash
# 启动所有服务
docker compose -f deploy/docker-compose.dev.yml up -d
# 停止所有服务
docker compose -f deploy/docker-compose.dev.yml down
# 重启Celery(代码更新后)
docker compose -f deploy/docker-compose.dev.yml restart celery-worker celery-beat
# 清空所有数据重新开始
./reset_all_data.sh
```
### 查看状态
```bash
# 服务状态
docker compose -f deploy/docker-compose.dev.yml ps
# 新闻数量
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT source, COUNT(*) FROM news GROUP BY source;"
# 任务数量
docker exec finnews_postgres psql -U finnews -d finnews_db -c "SELECT status, COUNT(*) FROM crawl_tasks GROUP BY status;"
# Redis缓存
docker exec finnews_redis redis-cli DBSIZE
```
### 查看日志
```bash
# Celery Beat(定时调度)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-beat
# Celery Worker(任务执行)
docker compose -f deploy/docker-compose.dev.yml logs -f celery-worker
# PostgreSQL
docker compose -f deploy/docker-compose.dev.yml logs -f postgres
# 所有服务
docker compose -f deploy/docker-compose.dev.yml logs -f
```
### 直接访问
- **前端**: http://localhost:3000
- **后端API**: http://localhost:8000
- **API文档**: http://localhost:8000/docs
---
## 📊 数据库结构
### News(新闻表)
- id, title, content, url, source
- publish_time, stock_codes
- sentiment_score, is_embedded
### Analysis(分析表)
- id, news_id, agent_name
- sentiment, sentiment_score, confidence
- analysis_result, structured_data
### Stock(股票表)
- id, code, name, industry, market
---
## 🛠️ 开发指南
### 添加新的爬虫
1. 继承 `BaseCrawler` 类
2. 实现 `crawl()` 方法
3. 注册到 `tools/__init__.py`
示例:
```python
# backend/app/tools/custom_crawler.py
from .crawler_base import BaseCrawler
class CustomCrawlerTool(BaseCrawler):
name = "custom_crawler"
def crawl(self, start_page, end_page):
# 实现爬取逻辑
pass
```
### 使用增强版爬虫(可选)
对于需要 JS 渲染或智能内容提取的场景,可使用增强版爬虫:
```python
from app.tools.crawler_enhanced import crawl_url, EnhancedCrawler
# 快速爬取单个 URL
article = crawl_url("https://finance.sina.com.cn/xxx", engine='auto')
print(article.to_markdown())
# 获取 LLM 消息格式(多模态)
llm_messages = article.to_llm_message()
# 批量爬取(带缓存)
crawler = EnhancedCrawler(use_cache=True)
articles = crawler.crawl_batch(urls, delay=1.0)
```
**支持的引擎:**
- `requests`: 基础 HTTP 请求(默认)
- `playwright`: JS 渲染(需安装 `playwright install chromium`)
- `jina`: Jina Reader API(需配置 `JINA_API_KEY`)
- `auto`: 自动选择最佳引擎
**安装可选依赖:**
```bash
pip install markdownify readabilipy playwright
playwright install chromium # 可选,用于 JS 渲染
```
---
### 添加新的智能体
1. 继承 `Agent` 类
2. 定义 role、goal、backstory
3. 实现业务方法
示例:
```python
# backend/app/agents/risk_analyst.py
from agenticx import Agent
class RiskAnalystAgent(Agent):
def __init__(self, llm_provider):
super().__init__(
name="RiskAnalyst",
role="风险分析师",
goal="评估投资风险",
llm_provider=llm_provider
)
```
---
### 使用 AgenticX 组件
FinnewsHunter 深度集成了 AgenticX 框架的核心组件,避免重复造轮子:
#### 1. 向量化服务(Embedding)
系统使用 `agenticx.embeddings.BailianEmbeddingProvider` 作为核心向量化引擎:
```python
from app.services.embedding_service import EmbeddingService
# 同步接口(适用于同步上下文)
embedding_service = EmbeddingService()
vector = embedding_service.embed_text("文本内容")
# 异步接口(推荐在异步上下文中使用)
vector = await embedding_service.aembed_text("文本内容")
# 批量处理(Provider 内部已实现批量优化)
vectors = embedding_service.embed_batch(["文本1", "文本2", "文本3"])
```
**特点**:
- 支持 Redis 缓存,避免重复计算
- 自动处理文本长度限制(6000字符)
- 支持同步和异步两种接口,避免事件循环冲突
#### 2. 向量存储(Milvus)
系统使用 `agenticx.storage.vectordb_storages.milvus.MilvusStorage` 作为向量数据库:
```python
from app.storage.vector_storage import VectorStorage
vector_storage = VectorStorage()
# 存储单个向量
vector_storage.store_embedding(
news_id=1,
text="新闻内容",
embedding=[0.1, 0.2, ...]
)
# 批量存储
vector_storage.store_embeddings_batch([
{"news_id": 1, "text": "内容1", "embedding": [...]},
{"news_id": 2, "text": "内容2", "embedding": [...]}
])
# 相似度搜索
results = vector_storage.search_similar(query_vector=[...], top_k=10)
# 获取统计信息(带查询计数回退机制)
stats = vector_storage.get_stats()
```
**特点**:
- 直接使用 AgenticX MilvusStorage,无需重复实现
- 提供兼容性接口,简化调用
- 当 `num_entities` 不准确时,通过实际查询获取真实数量
- 支持异步操作,避免阻塞
#### 3. 异步向量化最佳实践
在异步上下文中(如 FastAPI 路由),推荐使用异步接口:
```python
from app.services.embedding_service import EmbeddingService
from app.storage.vector_storage import VectorStorage
async def analyze_news(news_id: int, text: str):
embedding_service = EmbeddingService()
vector_storage = VectorStorage()
# 使用异步接口,避免事件循环冲突
embedding = await embedding_service.aembed_text(text)
# 后台异步存储向量(不阻塞分析流程)
asyncio.create_task(
vector_storage.store_embedding(news_id, text, embedding)
)
# 继续执行分析逻辑...
```
**注意事项**:
- 在异步上下文中,使用 `aembed_text()` 而不是 `embed_text()`
- 向量化操作在后台异步执行,不阻塞主流程
- Milvus 的 `flush()` 操作已优化,默认不执行(依赖自动刷新)
---
## 多智能体辩论架构
FinnewsHunter 的核心特色是 **多空辩论机制**,通过多个专业智能体的协作与对抗,深度挖掘个股的投资价值和风险。
### 核心参与角色
| 智能体 | 角色定位 | 核心职责 |
|--------|----------|----------|
| **BullResearcher** | 看多研究员 | 挖掘增长潜力、核心利好、估值优势 |
| **BearResearcher** | 看空研究员 | 识别下行风险、负面催化剂、反驳乐观预期 |
| **SearchAnalyst** | 搜索分析师 | 动态获取数据(AkShare/BochaAI/浏览器搜索) |
| **InvestmentManager** | 投资经理 | 主持辩论、评估论点质量、做出最终决策 |
### 辩论数据流架构
```mermaid
graph TD
subgraph 辩论启动
Manager[投资经理] -->|开场陈述| Orchestrator[辩论编排器]
end
subgraph 多轮辩论
Orchestrator -->|第N轮| Bull[看多研究员]
Bull -->|发言 + 数据请求| Orchestrator
Orchestrator -->|触发搜索| Searcher[搜索分析师]
Searcher -->|财务数据| AkShare[AkShare]
Searcher -->|实时新闻| BochaAI[BochaAI]
Searcher -->|网页搜索| Browser[浏览器引擎]
AkShare --> Context[更新上下文]
BochaAI --> Context
Browser --> Context
Context --> Orchestrator
Orchestrator -->|第N轮| Bear[看空研究员]
Bear -->|发言 + 数据请求| Orchestrator
end
subgraph 最终决策
Orchestrator -->|智能数据补充| Searcher
Orchestrator -->|综合判断| Manager
Manager -->|投资评级| Result[最终报告]
end
```
### 动态搜索机制
辩论过程中,智能体可以通过特定格式请求额外数据:
```
[SEARCH: "最近的毛利率数据" source:akshare] -- 从 AkShare 获取财务数据
[SEARCH: "行业竞争格局分析" source:bochaai] -- 从 BochaAI 搜索新闻
[SEARCH: "近期资金流向" source:akshare] -- 获取资金流向
[SEARCH: "竞品对比分析"] -- 自动选择最佳数据源
```
**支持的数据源:**
- **AkShare**: 财务指标、K线行情、资金流向、机构持仓
- **BochaAI**: 实时新闻搜索、分析师报告
- **浏览器搜索**: 百度资讯、搜狗、360等多引擎搜索
- **知识库**: 历史新闻和分析数据
---
## 📈 路线图
### Phase 1: MVP(已完成) ✅
- [x] 项目基础设施
- [x] 数据库模型
- [x] 爬虫工具重构(10个新闻源)
- [x] LLM 服务集成
- [x] NewsAnalyst 智能体
- [x] FastAPI 路由
- [x] React + TypeScript 前端
### Phase 1.5: 多厂商 LLM 支持(已完成) ✅
- [x] 支持 5 大 LLM 厂商(百炼、OpenAI、DeepSeek、Kimi、智谱)
- [x] 前端动态模型切换
- [x] LLM 配置 API(`/api/v1/llm/config`)
- [x] 新闻详情抽屉(完整内容 + AI 分析)
- [x] 实时搜索功能(多维度 + 关键词高亮)
- [x] Markdown 渲染(支持表格、代码块)
- [x] 一键复制分析报告
### Phase 1.6: 股票分析与增强爬虫(已完成) ✅
- [x] 股票 K 线图(集成 akshare + klinecharts)
- [x] 多周期支持(日K/60分/30分/15分/5分/1分)
- [x] 股票搜索(代码/名称模糊查询,预加载 5000+ A股)
- [x] 增强版爬虫模块
- [x] 多引擎支持(Requests/Playwright/Jina)
- [x] 智能内容提取(readabilipy + 启发式算法)
- [x] 内容质量评估与自动重试
- [x] 缓存机制和统一 Article 模型
### Phase 1.7: AgenticX 深度集成与批量操作(已完成) ✅
- [x] 迁移到 AgenticX BailianEmbeddingProvider(移除冗余批量处理逻辑)
- [x] 迁移到 AgenticX MilvusStorage(简化存储封装,移除重复代码)
- [x] 异步向量化接口(aembed_text/aembed_batch),避免事件循环冲突
- [x] 后台异步向量化,不阻塞分析流程
- [x] Milvus 统计信息优化(查询计数回退机制)
- [x] 前端批量选择功能(复选框 + Shift 范围选择)
- [x] 批量删除新闻功能
- [x] 批量分析新闻功能(带进度显示和结果统计)
- [x] Docker Compose 优化(Celery 镜像构建,提升启动性能)
### Phase 2: 多智能体辩论(已完成) ✅
- [x] BullResearcher & BearResearcher 智能体
- [x] SearchAnalyst 搜索分析师(动态数据获取)
- [x] InvestmentManager 投资经理决策
- [x] 辩论编排器(DebateOrchestrator)
- [x] 动态搜索机制(辩论中按需获取数据)
- [x] 三种辩论模式:并行分析、实时辩论、快速分析
- [ ] 实时 WebSocket 推送(进行中)
- [ ] 智能体执行轨迹可视化(进行中)
### Phase 3: 知识增强(计划中)
- [ ] 金融知识图谱(Neo4j)
- [ ] 智能体记忆系统
- [ ] GraphRetriever 图检索
### Phase 4: 自我进化(计划中)
- [ ] ACE 框架集成
- [ ] 投资策略 Playbook
- [ ] 决策效果评估与学习
---
## 📄 许可证
本项目遵循 AgenticX 的许可证。
---
## 🙏 致谢
- [AgenticX](https://github.com/yourusername/AgenticX) - 多智能体框架
- [FastAPI](https://fastapi.tiangolo.com/) - Web 框架
- [Milvus](https://milvus.io/) - 向量数据库
- [阿里云百炼](https://dashscope.console.aliyun.com/) - LLM 服务
- [Shadcn UI](https://ui.shadcn.com/) - 前端组件库
---
## ⭐ Star History
如果你觉得这个项目对你有帮助,欢迎给个 Star ⭐️!
[](https://star-history.com/#DemonDamon/FinnewsHunter&Date)
---
**Built with ❤️ using AgenticX**
================================================
FILE: backend/.gitignore
================================================
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
env/
venv/
ENV/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Environment variables
.env
.env.local
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# Logs
logs/
*.log
# Database
*.db
*.sqlite
# OS
.DS_Store
Thumbs.db
# Testing
.pytest_cache/
.coverage
htmlcov/
celerybeat-schedule
celerybeat-schedule
celerybeat-schedule
================================================
FILE: backend/README.md
================================================
# FinnewsHunter Backend
Backend service for the financial news intelligent analysis system based on the AgenticX framework.
## Documentation Navigation
### Quick Start
- **[QUICKSTART.md](../QUICKSTART.md)** - Quick start guide (recommended for beginners)
### Configuration Guides
- **[CONFIG_GUIDE.md](CONFIG_GUIDE.md)** - **Unified Configuration Guide** (recommended)
- Single configuration file supports all LLM providers
- Quick switching between OpenAI / Bailian / Proxy
- Includes scenario examples and working principles
- **[env.example](env.example)** - Configuration template (with comments for all scenarios)
### Specialized Configuration
- **[BAILIAN_SETUP.md](BAILIAN_SETUP.md)** - Detailed Alibaba Cloud Bailian configuration (recommended for Chinese users)
- **[API_PROXY_GUIDE.md](API_PROXY_GUIDE.md)** - API proxy configuration guide
---
## Quick Configuration
### Method 1: Interactive Script (Recommended)
```bash
chmod +x setup_env.sh
./setup_env.sh
# Follow the prompts to select:
# 1) OpenAI Official
# 2) Alibaba Cloud Bailian (recommended for Chinese users)
# 3) Other Proxy
# 4) Manual Configuration
```
### Method 2: Manual Configuration
```bash
cp env.example .env
nano .env # Choose configuration scheme according to comments
```
---
## Main Features
- **Multi-Agent System**: Based on AgenticX framework
- NewsAnalyst: News analysis agent
- More agents under development...
- **Data Collection**:
- Sina Finance crawler
- JRJ Finance crawler
- **Storage System**:
- PostgreSQL: Relational data storage
- Milvus: Vector database
- Redis: Cache and task queue
- **LLM Support**:
- OpenAI (GPT-3.5/GPT-4)
- Alibaba Cloud Bailian (Qwen)
- Other OpenAI-compatible services
---
## Project Structure
```
backend/
├── app/
│ ├── agents/ # Agent definitions
│ ├── api/ # FastAPI routes
│ ├── core/ # Core configuration
│ ├── models/ # Data models
│ ├── services/ # Business services
│ ├── storage/ # Storage wrappers
│ └── tools/ # Crawlers and tools
├── logs/ # Log files
├── tests/ # Test files
├── .env # Environment configuration (copy from env.example)
├── env.example # Configuration template
├── requirements.txt # Python dependencies
└── start.sh # Startup script
```
---
## Development Guide
### Start Development Environment
```bash
# 1. Configure environment variables
./setup_env.sh
# 2. Start services (including Docker containers)
./start.sh
```
### Utility Scripts
The project provides some utility scripts located in the `tests/` directory:
```bash
# Check Milvus vector storage data
python tests/check_milvus_data.py
# Check news embedding status
python tests/check_news_embedding_status.py
# Manually vectorize a specific news item (for fixing unvectorized news)
python tests/manual_vectorize.py <news_id>
```
### View Logs
```bash
tail -f logs/finnews.log
```
---
## Common Configuration Scenarios
### OpenAI Official
```bash
LLM_MODEL=gpt-3.5-turbo
OPENAI_API_KEY=sk-openai-key
MILVUS_DIM=1536
```
### Alibaba Cloud Bailian (Recommended for Chinese Users)
```bash
LLM_MODEL=qwen-plus
OPENAI_API_KEY=sk-bailian-key
OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
MILVUS_DIM=1024
```
### OpenAI Proxy
```bash
LLM_MODEL=gpt-3.5-turbo
OPENAI_API_KEY=sk-proxy-key
OPENAI_BASE_URL=https://your-proxy.com/v1
MILVUS_DIM=1536
```
For detailed information, see **[CONFIG_GUIDE.md](CONFIG_GUIDE.md)**
---
## API Documentation
- Swagger UI: http://localhost:8000/docs
- ReDoc: http://localhost:8000/redoc
### Troubleshooting
If the documentation page appears blank or keeps loading:
1. **Check Browser Console**: Press F12 to open developer tools, check Console and Network tabs for errors
2. **Try ReDoc**: If Swagger UI fails to load, try accessing ReDoc (uses a different CDN)
3. **Clear Browser Cache**: Press `Ctrl+Shift+R` (Windows/Linux) or `Cmd+Shift+R` (Mac) to force refresh
4. **Check Network Connection**: Documentation pages need to load JavaScript resources from CDN, ensure network connection is normal
5. **Check Backend Service**: Ensure the backend service is running, verify by accessing http://localhost:8000/health
================================================
FILE: backend/README_zn.md
================================================
# FinnewsHunter Backend
基于 AgenticX 框架的金融新闻智能分析系统后端服务。
## 文档导航
### 快速开始
- **[QUICKSTART.md](../QUICKSTART.md)** - 快速启动指南(推荐新手阅读)
### 配置指南
- **[CONFIG_GUIDE.md](CONFIG_GUIDE.md)** - **统一配置指南**(推荐首选)
- 一个配置文件支持所有 LLM 服务商
- 快速切换 OpenAI / 百炼 / 代理
- 包含场景示例和工作原理
- **[env.example](env.example)** - 配置模板(包含所有场景的注释)
### 专项配置
- **[BAILIAN_SETUP.md](BAILIAN_SETUP.md)** - 阿里云百炼详细配置(国内用户推荐)
- **[API_PROXY_GUIDE.md](API_PROXY_GUIDE.md)** - API 代理配置详解
---
## 快速配置
### 方法 1: 交互式脚本(推荐)
```bash
chmod +x setup_env.sh
./setup_env.sh
# 按提示选择:
# 1) OpenAI 官方
# 2) 阿里云百炼(推荐国内用户)
# 3) 其他代理
# 4) 手动配置
```
### 方法 2: 手动配置
```bash
cp env.example .env
nano .env # 根据注释选择配置方案
```
---
## 主要功能
- **多智能体系统**:基于 AgenticX 框架
- NewsAnalyst:新闻分析智能体
- 更多智能体开发中...
- **数据采集**:
- 新浪财经爬虫
- 金融界爬虫
- **存储系统**:
- PostgreSQL:关系数据存储
- Milvus:向量数据库
- Redis:缓存和任务队列
- **LLM 支持**:
- OpenAI (GPT-3.5/GPT-4)
- 阿里云百炼(通义千问)
- 其他 OpenAI 兼容服务
---
## 项目结构
```
backend/
├── app/
│ ├── agents/ # 智能体定义
│ ├── api/ # FastAPI 路由
│ ├── core/ # 核心配置
│ ├── models/ # 数据模型
│ ├── services/ # 业务服务
│ ├── storage/ # 存储封装
│ └── tools/ # 爬虫和工具
├── logs/ # 日志文件
├── tests/ # 测试文件
├── .env # 环境配置(从 env.example 复制)
├── env.example # 配置模板
├── requirements.txt # Python 依赖
└── start.sh # 启动脚本
```
---
## 开发指南
### 启动开发环境
```bash
# 1. 配置环境变量
./setup_env.sh
# 2. 启动服务(包括 Docker 容器)
./start.sh
```
### 工具脚本
项目提供了一些实用工具脚本,位于 `tests/` 目录下:
```bash
# 检查 Milvus 向量存储数据
python tests/check_milvus_data.py
# 检查新闻向量化状态
python tests/check_news_embedding_status.py
# 手动向量化指定新闻(用于修复未向量化的新闻)
python tests/manual_vectorize.py <news_id>
```
### 查看日志
```bash
tail -f logs/finnews.log
```
---
## 常用配置场景
### OpenAI 官方
```bash
LLM_MODEL=gpt-3.5-turbo
OPENAI_API_KEY=sk-openai-key
MILVUS_DIM=1536
```
### 阿里云百炼(推荐国内)
```bash
LLM_MODEL=qwen-plus
OPENAI_API_KEY=sk-bailian-key
OPENAI_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
MILVUS_DIM=1024
```
### OpenAI 代理
```bash
LLM_MODEL=gpt-3.5-turbo
OPENAI_API_KEY=sk-proxy-key
OPENAI_BASE_URL=https://your-proxy.com/v1
MILVUS_DIM=1536
```
详细说明见 **[CONFIG_GUIDE.md](CONFIG_GUIDE.md)**
---
## API 文档
- Swagger UI: http://localhost:8000/docs
- ReDoc: http://localhost:8000/redoc
### 手动触发爬取
如果某个新闻源显示为空,可以手动触发实时爬取:
```bash
# 触发腾讯财经爬取
curl -X POST "http://localhost:8000/api/v1/tasks/realtime" \
-H "Content-Type: application/json" \
-d '{"source": "tencent", "force_refresh": true}'
# 触发经济观察网爬取
curl -X POST "http://localhost:8000/api/v1/tasks/realtime" \
-H "Content-Type: application/json" \
-d '{"source": "eeo", "force_refresh": true}'
```
支持的新闻源:
- `sina` - 新浪财经
- `tencent` - 腾讯财经
- `eeo` - 经济观察网
- `jwview` - 金融界
- `caijing` - 财经网
- `jingji21` - 21经济网
- `nbd` - 每日经济新闻
- `yicai` - 第一财经
- `163` - 网易财经
- `eastmoney` - 东方财富
### 故障排查
如果文档页面显示空白或一直加载:
1. **检查浏览器控制台**:按 F12 打开开发者工具,查看 Console 和 Network 标签页是否有错误
2. **尝试 ReDoc**:如果 Swagger UI 无法加载,尝试访问 ReDoc(使用不同的 CDN)
3. **清除浏览器缓存**:按 `Ctrl+Shift+R` (Windows/Linux) 或 `Cmd+Shift+R` (Mac) 强制刷新
4. **检查网络连接**:文档页面需要从 CDN 加载 JavaScript 资源,确保网络连接正常
5. **检查后端服务**:确保后端服务正在运行,可以访问 http://localhost:8000/health 验证
================================================
FILE: backend/add_raw_html_column.py
================================================
"""
数据库迁移:添加 raw_html 字段
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# 加载环境变量
env_path = Path(__file__).parent / ".env"
load_dotenv(env_path)
# 构建数据库 URL
POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres")
POSTGRES_HOST = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_PORT = os.getenv("POSTGRES_PORT", "5432")
POSTGRES_DB = os.getenv("POSTGRES_DB", "finnews_db")
DATABASE_URL = f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}:{POSTGRES_PORT}/{POSTGRES_DB}"
from sqlalchemy import create_engine, text
def add_raw_html_column():
"""添加 raw_html 字段到 news 表"""
print("🔧 正在添加 raw_html 字段...")
engine = create_engine(DATABASE_URL)
with engine.connect() as conn:
# 检查字段是否已存在
result = conn.execute(text("""
SELECT column_name FROM information_schema.columns
WHERE table_name = 'news' AND column_name = 'raw_html'
"""))
if result.fetchone():
print("✅ raw_html 字段已存在,无需迁移")
return
# 添加字段
conn.execute(text("""
ALTER TABLE news ADD COLUMN raw_html TEXT
"""))
conn.commit()
print("✅ raw_html 字段已添加成功!")
if __name__ == "__main__":
print("=" * 50)
print("📦 数据库迁移:添加 raw_html 字段")
print("=" * 50)
add_raw_html_column()
================================================
FILE: backend/app/__init__.py
================================================
"""
FinnewsHunter Backend Application
"""
__version__ = "0.1.0"
================================================
FILE: backend/app/agents/__init__.py
================================================
"""
智能体模块
"""
from .news_analyst import NewsAnalystAgent, create_news_analyst
from .debate_agents import (
BullResearcherAgent,
BearResearcherAgent,
InvestmentManagerAgent,
DebateWorkflow,
create_debate_workflow,
)
from .data_collector_v2 import DataCollectorAgentV2, QuickAnalystAgent, create_data_collector
from .orchestrator import DebateOrchestrator, create_orchestrator
from .quantitative_agent import QuantitativeAgent, create_quantitative_agent
__all__ = [
"NewsAnalystAgent",
"create_news_analyst",
"BullResearcherAgent",
"BearResearcherAgent",
"InvestmentManagerAgent",
"DebateWorkflow",
"create_debate_workflow",
"DataCollectorAgentV2",
"QuickAnalystAgent",
"create_data_collector",
"DebateOrchestrator",
"create_orchestrator",
"QuantitativeAgent",
"create_quantitative_agent",
]
================================================
FILE: backend/app/agents/data_collector.py
================================================
"""
数据专员智能体
负责在辩论前搜集和整理相关数据资料,包括:
- 新闻数据(从数据库或BochaAI搜索)
- 财务数据(从AkShare获取)
- 行情数据(实时行情、K线等)
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from agenticx.core.agent import Agent
from ..services.llm_service import get_llm_provider
logger = logging.getLogger(__name__)
class DataCollectorAgent(Agent):
"""数据专员智能体"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
super().__init__(
name="DataCollector",
role="数据专员",
goal="搜集和整理股票相关的新闻、财务和行情数据,为辩论提供全面的信息支持",
backstory="""你是一位专业的金融数据分析师,擅长从多个数据源搜集和整理信息。
你的职责是在辩论开始前,为Bull/Bear研究员提供全面、准确、及时的数据支持。
你需要:
1. 搜集最新的相关新闻
2. 获取关键财务指标
3. 分析资金流向
4. 整理行情数据
你的工作质量直接影响辩论的深度和专业性。""",
organization_id=organization_id
)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
async def collect_data(
self,
stock_code: str,
stock_name: str,
data_requirements: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
搜集股票相关数据
Args:
stock_code: 股票代码
stock_name: 股票名称
data_requirements: 数据需求配置
Returns:
包含各类数据的字典
"""
logger.info(f"📊 DataCollector: 开始搜集 {stock_name}({stock_code}) 的数据...")
result = {
"stock_code": stock_code,
"stock_name": stock_name,
"collected_at": datetime.utcnow().isoformat(),
"news": [],
"financial": {},
"fund_flow": {},
"realtime_quote": {},
"summary": ""
}
try:
# 1. 搜集新闻数据
news_data = await self._collect_news(stock_code, stock_name)
result["news"] = news_data
logger.info(f"📰 DataCollector: 搜集到 {len(news_data)} 条新闻")
# 2. 搜集财务数据
financial_data = await self._collect_financial(stock_code)
result["financial"] = financial_data
logger.info(f"💰 DataCollector: 搜集到财务数据")
# 3. 搜集资金流向
fund_flow = await self._collect_fund_flow(stock_code)
result["fund_flow"] = fund_flow
logger.info(f"💸 DataCollector: 搜集到资金流向数据")
# 4. 搜集实时行情
realtime = await self._collect_realtime_quote(stock_code)
result["realtime_quote"] = realtime
logger.info(f"📈 DataCollector: 搜集到实时行情")
# 5. 生成数据摘要
result["summary"] = await self._generate_summary(result)
logger.info(f"📋 DataCollector: 数据摘要生成完成")
except Exception as e:
logger.error(f"DataCollector 搜集数据时出错: {e}", exc_info=True)
result["error"] = str(e)
return result
async def _collect_news(self, stock_code: str, stock_name: str) -> List[Dict[str, Any]]:
"""搜集新闻数据"""
from ..services.news_service import news_service
try:
# 从数据库获取已有新闻
news_list = await news_service.get_news_by_stock(stock_code, limit=20)
return [
{
"title": news.title,
"content": news.content[:500] if news.content else "",
"source": news.source,
"published_at": news.published_at.isoformat() if news.published_at else None,
"sentiment": news.sentiment
}
for news in news_list
]
except Exception as e:
logger.warning(f"从数据库获取新闻失败: {e}")
return []
async def _collect_financial(self, stock_code: str) -> Dict[str, Any]:
"""搜集财务数据"""
from ..services.stock_data_service import stock_data_service
try:
return await stock_data_service.get_financial_indicators(stock_code) or {}
except Exception as e:
logger.warning(f"获取财务数据失败: {e}")
return {}
async def _collect_fund_flow(self, stock_code: str) -> Dict[str, Any]:
"""搜集资金流向数据"""
from ..services.stock_data_service import stock_data_service
try:
return await stock_data_service.get_fund_flow(stock_code) or {}
except Exception as e:
logger.warning(f"获取资金流向失败: {e}")
return {}
async def _collect_realtime_quote(self, stock_code: str) -> Dict[str, Any]:
"""搜集实时行情"""
from ..services.stock_data_service import stock_data_service
try:
return await stock_data_service.get_realtime_quote(stock_code) or {}
except Exception as e:
logger.warning(f"获取实时行情失败: {e}")
return {}
async def _generate_summary(self, data: Dict[str, Any]) -> str:
"""使用LLM生成数据摘要"""
try:
# 准备摘要内容
news_summary = ""
if data.get("news"):
news_titles = [n["title"] for n in data["news"][:5]]
news_summary = f"最新新闻({len(data['news'])}条):\n" + "\n".join(f"- {t}" for t in news_titles)
financial_summary = ""
if data.get("financial"):
f = data["financial"]
financial_summary = f"""财务指标:
- PE: {f.get('pe', 'N/A')}
- PB: {f.get('pb', 'N/A')}
- ROE: {f.get('roe', 'N/A')}
- 净利润增长率: {f.get('net_profit_growth', 'N/A')}"""
fund_flow_summary = ""
if data.get("fund_flow"):
ff = data["fund_flow"]
fund_flow_summary = f"""资金流向:
- 主力净流入: {ff.get('main_net_inflow', 'N/A')}
- 散户净流入: {ff.get('retail_net_inflow', 'N/A')}"""
realtime_summary = ""
if data.get("realtime_quote"):
rt = data["realtime_quote"]
realtime_summary = f"""实时行情:
- 当前价: {rt.get('price', 'N/A')}
- 涨跌幅: {rt.get('change_pct', 'N/A')}%
- 成交量: {rt.get('volume', 'N/A')}"""
summary = f"""## {data['stock_name']}({data['stock_code']}) 数据摘要
{realtime_summary}
{financial_summary}
{fund_flow_summary}
{news_summary}
数据搜集时间: {data['collected_at']}"""
return summary
except Exception as e:
logger.error(f"生成数据摘要失败: {e}")
return f"数据搜集完成,但生成摘要时出错: {e}"
async def analyze_data_quality(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""分析数据质量和完整性"""
quality = {
"score": 0,
"max_score": 100,
"details": [],
"recommendations": []
}
# 检查新闻数据
news_count = len(data.get("news", []))
if news_count >= 10:
quality["score"] += 30
quality["details"].append(f"✅ 新闻数据充足({news_count}条)")
elif news_count >= 5:
quality["score"] += 20
quality["details"].append(f"⚠️ 新闻数据较少({news_count}条)")
quality["recommendations"].append("建议搜集更多新闻以支持分析")
elif news_count > 0:
quality["score"] += 10
quality["details"].append(f"⚠️ 新闻数据不足({news_count}条)")
quality["recommendations"].append("新闻数据偏少,分析可能不够全面")
else:
quality["details"].append("❌ 无新闻数据")
quality["recommendations"].append("缺少新闻数据,建议先进行定向爬取")
# 检查财务数据
if data.get("financial"):
quality["score"] += 25
quality["details"].append("✅ 财务数据完整")
else:
quality["details"].append("❌ 缺少财务数据")
quality["recommendations"].append("无法获取财务指标")
# 检查资金流向
if data.get("fund_flow"):
quality["score"] += 20
quality["details"].append("✅ 资金流向数据完整")
else:
quality["details"].append("⚠️ 缺少资金流向数据")
# 检查实时行情
if data.get("realtime_quote"):
quality["score"] += 25
quality["details"].append("✅ 实时行情数据完整")
else:
quality["details"].append("⚠️ 缺少实时行情数据")
return quality
# 快速分析师(用于快速分析模式)
class QuickAnalystAgent(Agent):
"""快速分析师智能体"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
super().__init__(
name="QuickAnalyst",
role="快速分析师",
goal="快速综合多角度给出投资建议",
backstory="""你是一位经验丰富的量化分析师,擅长快速分析和决策。
你能够在短时间内综合考虑多空因素,给出简洁明了的投资建议。
你的分析风格是:快速、准确、实用。""",
organization_id=organization_id
)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
async def quick_analyze(
self,
stock_code: str,
stock_name: str,
context: str
) -> Dict[str, Any]:
"""快速分析"""
# 获取当前系统时间
current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M")
prompt = f"""请对 {stock_name}({stock_code}) 进行快速投资分析。
【当前时间】
{current_time}
背景资料:
{context}
请在1分钟内给出:
1. 核心观点(一句话)
2. 看多因素(3点)
3. 看空因素(3点)
4. 投资建议(买入/持有/卖出)
5. 目标价位和止损价位
请用简洁的语言,直接给出结论。"""
try:
response = await self._llm_provider.chat(prompt)
return {
"success": True,
"analysis": response,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Quick analysis failed: {e}")
return {
"success": False,
"error": str(e)
}
================================================
FILE: backend/app/agents/data_collector_v2.py
================================================
"""
数据专员智能体 V2 (DataCollectorAgent)
统一负责所有数据获取任务,支持:
- 辩论前的初始数据收集
- 辩论中的动态数据补充
- 用户追问时的按需搜索
核心特性:
1. 计划/执行分离:先生成搜索计划,用户确认后再执行
2. 多数据源支持:AkShare、BochaAI、网页搜索、知识库
3. 智能意图识别:根据用户问题自动选择数据源
"""
import logging
import re
import asyncio
from typing import Dict, Any, List, Optional, ClassVar, Pattern
from datetime import datetime
from enum import Enum
from pydantic import BaseModel, Field
from agenticx.core.agent import Agent
from ..services.llm_service import get_llm_provider
from ..services.stock_data_service import stock_data_service
from ..tools.bochaai_search import bochaai_search, SearchResult
from ..tools.interactive_crawler import InteractiveCrawler
logger = logging.getLogger(__name__)
class SearchSource(str, Enum):
"""搜索数据源类型"""
AKSHARE = "akshare" # AkShare 财务/行情数据
BOCHAAI = "bochaai" # BochaAI Web搜索
BROWSER = "browser" # 交互式浏览器搜索
KNOWLEDGE_BASE = "kb" # 内部知识库
ALL = "all" # 所有来源
class SearchTask(BaseModel):
"""单个搜索任务"""
id: str = Field(..., description="任务ID")
source: SearchSource = Field(..., description="数据源")
query: str = Field(..., description="搜索查询")
description: str = Field("", description="任务描述(用于展示给用户)")
data_type: Optional[str] = Field(None, description="数据类型(如 financial, news, kline)")
icon: str = Field("🔍", description="图标(用于UI展示)")
estimated_time: int = Field(3, description="预计耗时(秒)")
class SearchPlan(BaseModel):
"""搜索计划"""
plan_id: str = Field(..., description="计划ID")
stock_code: str = Field(..., description="股票代码")
stock_name: str = Field("", description="股票名称")
user_query: str = Field(..., description="用户原始问题")
tasks: List[SearchTask] = Field(default_factory=list, description="搜索任务列表")
total_estimated_time: int = Field(0, description="总预计耗时(秒)")
created_at: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
status: str = Field("pending", description="状态:pending, confirmed, executing, completed, cancelled")
class SearchResult(BaseModel):
"""搜索结果"""
task_id: str
source: str
success: bool
data: Dict[str, Any] = Field(default_factory=dict)
summary: str = ""
error: Optional[str] = None
execution_time: float = 0
class DataCollectorAgentV2(Agent):
"""
数据专员智能体 V2
支持"确认优先"模式:
1. 用户 @数据专员 提问
2. 生成搜索计划(不执行)
3. 用户确认后执行
4. 返回结果
"""
# 关键词到数据源的映射
KEYWORD_SOURCE_MAP: ClassVar[Dict[str, tuple]] = {
# 财务相关 -> AkShare
"财务": (SearchSource.AKSHARE, "financial", "📊"),
"pe": (SearchSource.AKSHARE, "financial", "📊"),
"pb": (SearchSource.AKSHARE, "financial", "📊"),
"roe": (SearchSource.AKSHARE, "financial", "📊"),
"利润": (SearchSource.AKSHARE, "financial", "📊"),
"营收": (SearchSource.AKSHARE, "financial", "📊"),
"估值": (SearchSource.AKSHARE, "financial", "📊"),
"市盈": (SearchSource.AKSHARE, "financial", "📊"),
"市净": (SearchSource.AKSHARE, "financial", "📊"),
"报表": (SearchSource.AKSHARE, "financial", "📊"),
# 资金/行情 -> AkShare
"资金": (SearchSource.AKSHARE, "fund_flow", "💰"),
"主力": (SearchSource.AKSHARE, "fund_flow", "💰"),
"流入": (SearchSource.AKSHARE, "fund_flow", "💰"),
"流出": (SearchSource.AKSHARE, "fund_flow", "💰"),
"行情": (SearchSource.AKSHARE, "realtime", "📈"),
"价格": (SearchSource.AKSHARE, "realtime", "📈"),
"涨跌": (SearchSource.AKSHARE, "realtime", "📈"),
"k线": (SearchSource.AKSHARE, "kline", "📈"),
"走势": (SearchSource.AKSHARE, "kline", "📈"),
# 新闻相关 -> BochaAI
"新闻": (SearchSource.BOCHAAI, "news", "📰"),
"资讯": (SearchSource.BOCHAAI, "news", "📰"),
"报道": (SearchSource.BOCHAAI, "news", "📰"),
"公告": (SearchSource.BOCHAAI, "news", "📰"),
"消息": (SearchSource.BOCHAAI, "news", "📰"),
# 上下游/产业链 -> 多源搜索
"上下游": (SearchSource.BROWSER, "industry", "🔗"),
"供应链": (SearchSource.BROWSER, "industry", "🔗"),
"客户": (SearchSource.BROWSER, "industry", "🔗"),
"供应商": (SearchSource.BROWSER, "industry", "🔗"),
"合作": (SearchSource.BROWSER, "industry", "🔗"),
"产业链": (SearchSource.BROWSER, "industry", "🔗"),
}
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
super().__init__(
name="DataCollector",
role="数据专员",
goal="根据用户需求,从多个数据源搜集和整理相关信息,支持辩论前准备和辩论中追问",
backstory="""你是一位专业的金融数据专家,精通各类金融数据源的使用。
你的职责是:
1. 理解用户的数据需求
2. 制定合理的搜索计划
3. 从多个数据源获取数据
4. 整理并格式化数据
你能够访问的数据源包括:
- AkShare: 股票财务指标、K线行情、资金流向等
- BochaAI: 实时新闻搜索、财经报道
- 网页搜索: 百度资讯、搜狗等
- 知识库: 历史新闻和分析数据""",
organization_id=organization_id
)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
# 初始化搜索工具
self._interactive_crawler = InteractiveCrawler(timeout=20)
logger.info(f"✅ Initialized DataCollectorV2 with multi-source search capabilities")
async def generate_search_plan(
self,
query: str,
stock_code: str,
stock_name: str = ""
) -> SearchPlan:
"""
生成搜索计划(不执行)
根据用户问题分析需要哪些数据,生成待确认的搜索计划
Args:
query: 用户问题
stock_code: 股票代码
stock_name: 股票名称
Returns:
SearchPlan 对象
"""
logger.info(f"📋 DataCollector: 为 '{query}' 生成搜索计划...")
plan_id = f"plan_{datetime.utcnow().strftime('%Y%m%d%H%M%S')}_{stock_code}"
plan = SearchPlan(
plan_id=plan_id,
stock_code=stock_code,
stock_name=stock_name or stock_code,
user_query=query,
tasks=[],
status="pending"
)
query_lower = query.lower()
# 1. 基于关键词匹配生成任务
matched_sources = set()
for keyword, (source, data_type, icon) in self.KEYWORD_SOURCE_MAP.items():
if keyword in query_lower:
if (source, data_type) not in matched_sources:
matched_sources.add((source, data_type))
task = self._create_task(
source=source,
data_type=data_type,
icon=icon,
query=query,
stock_code=stock_code,
stock_name=stock_name
)
plan.tasks.append(task)
# 2. 如果没有匹配到任何关键词,使用 LLM 分析
if not plan.tasks:
plan.tasks = await self._analyze_with_llm(query, stock_code, stock_name)
# 3. 如果还是没有任务,添加默认的综合搜索
if not plan.tasks:
plan.tasks = [
SearchTask(
id=f"task_{plan_id}_1",
source=SearchSource.BOCHAAI,
query=f"{stock_name or stock_code} {query}",
description=f"搜索 {stock_name} 相关新闻",
icon="📰",
estimated_time=3
),
SearchTask(
id=f"task_{plan_id}_2",
source=SearchSource.AKSHARE,
query=query,
description="获取最新财务和行情数据",
data_type="overview",
icon="📊",
estimated_time=2
)
]
# 计算总耗时
plan.total_estimated_time = sum(t.estimated_time for t in plan.tasks)
logger.info(f"✅ 生成搜索计划: {len(plan.tasks)} 个任务,预计耗时 {plan.total_estimated_time}s")
return plan
def _create_task(
self,
source: SearchSource,
data_type: str,
icon: str,
query: str,
stock_code: str,
stock_name: str
) -> SearchTask:
"""创建搜索任务"""
task_id = f"task_{datetime.utcnow().strftime('%H%M%S%f')}"
# 根据数据类型生成描述
descriptions = {
"financial": f"获取 {stock_name or stock_code} 财务指标(PE/PB/ROE等)",
"fund_flow": f"获取 {stock_name or stock_code} 资金流向(主力/散户)",
"realtime": f"获取 {stock_name or stock_code} 实时行情",
"kline": f"获取 {stock_name or stock_code} K线走势",
"news": f"搜索 {stock_name or stock_code} 最新新闻",
"industry": f"搜索 {stock_name or stock_code} 产业链/上下游信息",
}
# 根据数据类型生成查询
queries = {
"financial": stock_code,
"fund_flow": stock_code,
"realtime": stock_code,
"kline": stock_code,
"news": f"{stock_name or stock_code} {query}",
"industry": f"{stock_name or stock_code} {query}",
}
return SearchTask(
id=task_id,
source=source,
query=queries.get(data_type, query),
description=descriptions.get(data_type, f"搜索: {query}"),
data_type=data_type,
icon=icon,
estimated_time=3 if source != SearchSource.BROWSER else 5
)
async def _analyze_with_llm(
self,
query: str,
stock_code: str,
stock_name: str
) -> List[SearchTask]:
"""使用 LLM 分析需要哪些数据"""
try:
prompt = f"""分析以下用户问题,判断需要搜索哪些数据:
用户问题: "{query}"
股票: {stock_name}({stock_code})
可用数据源:
1. akshare - 财务数据(PE/PB/ROE等)、资金流向、实时行情、K线
2. bochaai - 新闻搜索、财经报道
3. browser - 网页搜索(适合搜索产业链、上下游、合作方等)
4. kb - 历史新闻数据库
请返回需要搜索的内容,格式如下(每行一个):
SOURCE:数据源|TYPE:数据类型|QUERY:搜索词|DESC:描述
示例:
SOURCE:bochaai|TYPE:news|QUERY:ST国华 上下游|DESC:搜索ST国华上下游相关新闻
SOURCE:akshare|TYPE:financial|QUERY:002074|DESC:获取国轩高科财务数据
只输出2-4个最相关的搜索任务。"""
response = self._llm_provider.invoke([
{"role": "system", "content": "你是数据搜索专家,帮助分析需要哪些数据。"},
{"role": "user", "content": prompt}
])
content = response.content if hasattr(response, 'content') else str(response)
tasks = []
for line in content.strip().split('\n'):
if 'SOURCE:' in line:
try:
parts = {}
for part in line.split('|'):
if ':' in part:
key, value = part.split(':', 1)
parts[key.strip().upper()] = value.strip()
if 'SOURCE' in parts:
source_str = parts['SOURCE'].lower()
source = SearchSource(source_str) if source_str in [s.value for s in SearchSource] else SearchSource.BOCHAAI
tasks.append(SearchTask(
id=f"task_llm_{len(tasks)+1}",
source=source,
query=parts.get('QUERY', query),
description=parts.get('DESC', f"搜索: {query}"),
data_type=parts.get('TYPE', 'general'),
icon=self._get_icon_for_source(source),
estimated_time=3
))
except Exception as e:
logger.debug(f"解析 LLM 响应行失败: {e}")
return tasks
except Exception as e:
logger.warning(f"LLM 分析失败: {e}")
return []
def _get_icon_for_source(self, source: SearchSource) -> str:
"""获取数据源对应的图标"""
icons = {
SearchSource.AKSHARE: "📊",
SearchSource.BOCHAAI: "📰",
SearchSource.BROWSER: "🌐",
SearchSource.KNOWLEDGE_BASE: "📚",
SearchSource.ALL: "🔍"
}
return icons.get(source, "🔍")
async def execute_search_plan(
self,
plan: SearchPlan
) -> Dict[str, Any]:
"""
执行搜索计划
Args:
plan: 已确认的搜索计划
Returns:
搜索结果汇总
"""
logger.info(f"🚀 DataCollector: 开始执行搜索计划 {plan.plan_id}...")
plan.status = "executing"
start_time = datetime.utcnow()
results = {
"plan_id": plan.plan_id,
"stock_code": plan.stock_code,
"stock_name": plan.stock_name,
"user_query": plan.user_query,
"task_results": [],
"combined_data": {},
"summary": "",
"success": False,
"execution_time": 0
}
# 并行执行所有任务
async_tasks = []
for task in plan.tasks:
async_tasks.append(self._execute_task(task, plan.stock_code, plan.stock_name))
task_results = await asyncio.gather(*async_tasks, return_exceptions=True)
# 收集结果
for i, result in enumerate(task_results):
if isinstance(result, Exception):
logger.error(f"任务执行失败: {result}")
results["task_results"].append(SearchResult(
task_id=plan.tasks[i].id,
source=plan.tasks[i].source.value,
success=False,
error=str(result)
).dict())
else:
results["task_results"].append(result.dict() if hasattr(result, 'dict') else result)
if result.get("success"):
# 合并数据
source = result.get("source", "unknown")
if source not in results["combined_data"]:
results["combined_data"][source] = {}
results["combined_data"][source].update(result.get("data", {}))
# 生成综合摘要
results["summary"] = await self._generate_combined_summary(
plan.user_query,
results["combined_data"],
plan.stock_name
)
# 计算执行时间
end_time = datetime.utcnow()
results["execution_time"] = (end_time - start_time).total_seconds()
results["success"] = any(r.get("success") for r in results["task_results"])
plan.status = "completed"
logger.info(f"✅ 搜索计划执行完成,耗时 {results['execution_time']:.1f}s")
return results
async def _execute_task(
self,
task: SearchTask,
stock_code: str,
stock_name: str
) -> Dict[str, Any]:
"""执行单个搜索任务"""
logger.info(f"🔍 执行任务: {task.description}")
start_time = datetime.utcnow()
result = {
"task_id": task.id,
"source": task.source.value,
"success": False,
"data": {},
"summary": "",
"execution_time": 0
}
try:
if task.source == SearchSource.AKSHARE:
data = await self._search_akshare(task.query, stock_code, task.data_type)
result["data"] = data or {}
result["success"] = bool(data)
elif task.source == SearchSource.BOCHAAI:
data = await self._search_bochaai(task.query, stock_name)
result["data"] = data or {}
result["success"] = bool(data)
elif task.source == SearchSource.BROWSER:
data = await self._search_browser(task.query)
result["data"] = data or {}
result["success"] = bool(data)
elif task.source == SearchSource.KNOWLEDGE_BASE:
data = await self._search_knowledge_base(task.query, stock_code)
result["data"] = data or {}
result["success"] = bool(data)
except Exception as e:
logger.error(f"任务 {task.id} 执行失败: {e}")
result["error"] = str(e)
end_time = datetime.utcnow()
result["execution_time"] = (end_time - start_time).total_seconds()
return result
async def _search_akshare(
self,
query: str,
stock_code: str,
data_type: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""从 AkShare 获取数据"""
data = {}
try:
if data_type == "financial" or data_type == "overview":
financial = await stock_data_service.get_financial_indicators(stock_code)
if financial:
data["financial_indicators"] = financial
if data_type == "fund_flow" or data_type == "overview":
fund_flow = await stock_data_service.get_fund_flow(stock_code, days=10)
if fund_flow:
data["fund_flow"] = fund_flow
if data_type == "realtime" or data_type == "overview":
realtime = await stock_data_service.get_realtime_quote(stock_code)
if realtime:
data["realtime_quote"] = realtime
if data_type == "kline":
kline = await stock_data_service.get_kline_data(stock_code, period="daily", limit=30)
if kline:
data["kline_summary"] = {
"period": "daily",
"count": len(kline),
"latest": kline[-1] if kline else None,
"recent_5": kline[-5:] if len(kline) >= 5 else kline
}
if data:
logger.info(f"✅ AkShare 返回数据: {list(data.keys())}")
return data
except Exception as e:
logger.warning(f"AkShare 搜索出错: {e}")
return None
async def _search_bochaai(
self,
query: str,
stock_name: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""从 BochaAI 搜索新闻"""
if not bochaai_search.is_available():
logger.debug("BochaAI 未配置,跳过")
return None
try:
results = bochaai_search.search(
query=query,
freshness="oneWeek",
count=10
)
if results:
news_list = [
{
"title": r.title,
"snippet": r.snippet[:200] if r.snippet else "",
"url": r.url,
"source": r.site_name or "unknown",
"date": r.date_published or ""
}
for r in results
]
logger.info(f"✅ BochaAI 返回 {len(news_list)} 条新闻")
return {"news": news_list, "count": len(news_list)}
except Exception as e:
logger.warning(f"BochaAI 搜索出错: {e}")
return None
async def _search_browser(self, query: str) -> Optional[Dict[str, Any]]:
"""使用交互式爬虫搜索"""
try:
loop = asyncio.get_event_loop()
results = await loop.run_in_executor(
None,
lambda: self._interactive_crawler.interactive_search(
query=query,
engines=["baidu_news", "sogou"],
num_results=10,
search_type="news"
)
)
if results:
news_list = [
{
"title": r.get("title", ""),
"snippet": r.get("snippet", "")[:200],
"url": r.get("url", ""),
"source": "browser_search"
}
for r in results
]
logger.info(f"✅ Browser 返回 {len(news_list)} 条结果")
return {"search_results": news_list, "count": len(news_list)}
except Exception as e:
logger.warning(f"Browser 搜索出错: {e}")
return None
async def _search_knowledge_base(
self,
query: str,
stock_code: str
) -> Optional[Dict[str, Any]]:
"""从知识库搜索历史数据"""
try:
from ..services.news_service import news_service
if stock_code and news_service:
news_list = await news_service.get_news_by_stock(stock_code, limit=10)
if news_list:
kb_news = [
{
"title": getattr(news, 'title', ''),
"content": (getattr(news, 'content', '') or '')[:300],
"source": getattr(news, 'source', ''),
"date": news.publish_time.isoformat() if hasattr(news, 'publish_time') and news.publish_time else ""
}
for news in news_list
]
logger.info(f"✅ KB 返回 {len(kb_news)} 条历史新闻")
return {"historical_news": kb_news, "count": len(kb_news)}
except Exception as e:
logger.debug(f"KB 搜索出错: {e}")
return None
async def _generate_combined_summary(
self,
query: str,
data: Dict[str, Any],
stock_name: str
) -> str:
"""生成综合摘要"""
summary_parts = [f"## 搜索结果: {query}\n"]
summary_parts.append(f"**股票**: {stock_name}\n")
# AkShare 数据
if "akshare" in data:
ak_data = data["akshare"]
summary_parts.append("### 📊 财务/行情数据\n")
if "financial_indicators" in ak_data:
fi = ak_data["financial_indicators"]
summary_parts.append(f"- PE: {fi.get('pe_ratio', 'N/A')}, PB: {fi.get('pb_ratio', 'N/A')}")
summary_parts.append(f"- ROE: {fi.get('roe', 'N/A')}%")
if "realtime_quote" in ak_data:
rt = ak_data["realtime_quote"]
summary_parts.append(f"- 当前价: {rt.get('price', 'N/A')}元, 涨跌幅: {rt.get('change_percent', 'N/A')}%")
if "fund_flow" in ak_data:
ff = ak_data["fund_flow"]
summary_parts.append(f"- 资金流向: {ff.get('main_flow_trend', 'N/A')}")
summary_parts.append("")
# BochaAI 新闻
if "bochaai" in data:
news = data["bochaai"].get("news", [])
if news:
summary_parts.append("### 📰 最新新闻\n")
for i, n in enumerate(news[:5], 1):
summary_parts.append(f"{i}. **{n['title'][:50]}**")
if n.get('snippet'):
summary_parts.append(f" {n['snippet'][:100]}...")
summary_parts.append("")
# Browser 结果
if "browser" in data:
results = data["browser"].get("search_results", [])
if results:
summary_parts.append("### 🌐 网页搜索结果\n")
for i, r in enumerate(results[:5], 1):
summary_parts.append(f"{i}. {r['title'][:50]}")
summary_parts.append("")
# KB 历史数据
if "kb" in data:
kb_news = data["kb"].get("historical_news", [])
if kb_news:
summary_parts.append("### 📚 历史资料\n")
for i, n in enumerate(kb_news[:3], 1):
summary_parts.append(f"{i}. {n['title'][:50]}")
summary_parts.append("")
return "\n".join(summary_parts)
# ============ 兼容旧 API ============
async def collect_data(
self,
stock_code: str,
stock_name: str,
data_requirements: Optional[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""
搜集股票相关数据(兼容旧 API)
"""
# 创建并执行一个全面的搜索计划
plan = await self.generate_search_plan(
query="综合数据搜集",
stock_code=stock_code,
stock_name=stock_name
)
# 添加所有基础数据任务
plan.tasks = [
SearchTask(
id=f"task_init_1",
source=SearchSource.AKSHARE,
query=stock_code,
description="获取财务和行情数据",
data_type="overview",
icon="📊",
estimated_time=3
),
SearchTask(
id=f"task_init_2",
source=SearchSource.KNOWLEDGE_BASE,
query=stock_code,
description="获取历史新闻",
data_type="news",
icon="📚",
estimated_time=2
)
]
return await self.execute_search_plan(plan)
# 快速分析师(保持不变)
class QuickAnalystAgent(Agent):
"""快速分析师智能体"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
super().__init__(
name="QuickAnalyst",
role="快速分析师",
goal="快速综合多角度给出投资建议",
backstory="""你是一位经验丰富的量化分析师,擅长快速分析和决策。
你能够在短时间内综合考虑多空因素,给出简洁明了的投资建议。
你的分析风格是:快速、准确、实用。""",
organization_id=organization_id
)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
async def quick_analyze(
self,
stock_code: str,
stock_name: str,
context: str
) -> Dict[str, Any]:
"""快速分析"""
current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M")
prompt = f"""请对 {stock_name}({stock_code}) 进行快速投资分析。
【当前时间】
{current_time}
背景资料:
{context}
请在1分钟内给出:
1. 核心观点(一句话)
2. 看多因素(3点)
3. 看空因素(3点)
4. 投资建议(买入/持有/卖出)
5. 目标价位和止损价位
请用简洁的语言,直接给出结论。"""
try:
response = self._llm_provider.invoke([
{"role": "system", "content": "你是快速分析师,擅长快速给出投资建议。"},
{"role": "user", "content": prompt}
])
content = response.content if hasattr(response, 'content') else str(response)
return {
"success": True,
"analysis": content,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Quick analysis failed: {e}")
return {
"success": False,
"error": str(e)
}
# 工厂函数
def create_data_collector(llm_provider=None) -> DataCollectorAgentV2:
"""创建数据专员实例"""
return DataCollectorAgentV2(llm_provider=llm_provider)
================================================
FILE: backend/app/agents/debate_agents.py
================================================
"""
辩论智能体 - Phase 2
实现 Bull vs Bear 多智能体辩论机制
支持动态搜索:智能体可以在发言中请求额外数据
格式: [SEARCH: "查询内容" source:数据源]
"""
import logging
from typing import List, Dict, Any, Optional
from datetime import datetime
from agenticx import Agent
from ..services.llm_service import get_llm_provider
logger = logging.getLogger(__name__)
# 数据请求提示词片段(用于启用动态搜索的场景)
DATA_REQUEST_HINT = """
【数据请求】如果需要更多数据支撑你的论点,可以在发言末尾添加搜索请求:
- [SEARCH: "具体数据需求" source:akshare] -- 财务/行情数据
- [SEARCH: "新闻关键词" source:bochaai] -- 最新新闻
- [SEARCH: "搜索内容"] -- 自动选择最佳数据源
请只在确实需要时使用,每次最多1-2个请求。"""
class BullResearcherAgent(Agent):
"""
看多研究员智能体
职责:基于新闻和数据,生成看多观点和投资建议
支持在辩论中请求额外数据
"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
# 先调用父类初始化(Pydantic BaseModel)
super().__init__(
name="BullResearcher",
role="看多研究员",
goal="从积极角度分析股票,发现投资机会和增长潜力",
backstory="""你是一位乐观但理性的股票研究员,擅长发现被低估的投资机会。
你善于从新闻和数据中提取正面信息,分析公司的增长潜力、竞争优势和市场机遇。
你的分析注重长期价值,但也关注短期催化剂。
当你发现数据不足以支撑论点时,你会主动请求补充数据。""",
organization_id=organization_id
)
# 在 super().__init__() 之后设置 _llm_provider(避免被 Pydantic 清除)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
def analyze(
self,
stock_code: str,
stock_name: str,
news_list: List[Dict[str, Any]],
context: str = ""
) -> Dict[str, Any]:
"""
生成看多分析报告
"""
news_summary = self._summarize_news(news_list)
# 获取当前系统时间
current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M")
prompt = f"""你是一位看多研究员,请从积极角度分析以下股票:
【当前时间】
{current_time}
【股票信息】
代码:{stock_code}
名称:{stock_name}
【相关新闻摘要】
{news_summary}
【分析背景】
{context if context else "无额外背景信息"}
请从以下角度进行看多分析:
## 1. 核心看多逻辑
- 列出3-5个看多的核心理由
- 每个理由需要有数据或新闻支撑
## 2. 增长催化剂
- 短期催化剂(1-3个月内可能发生的利好)
- 中长期催化剂(3-12个月的增长驱动力)
## 3. 估值分析
- 当前估值是否具有吸引力
- 与同行业对比的优势
## 4. 目标预期
- 给出合理的预期收益空间
- 说明达成条件
## 5. 风险提示
- 虽然看多,但也需要指出可能的风险
请确保分析客观、有理有据,避免盲目乐观。
"""
try:
response = self._llm_provider.invoke([
{"role": "system", "content": f"你是{self.role},{self.backstory}"},
{"role": "user", "content": prompt}
])
analysis_text = response.content if hasattr(response, 'content') else str(response)
return {
"success": True,
"agent_name": self.name,
"agent_role": self.role,
"stance": "bull",
"analysis": analysis_text,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Bull analysis failed: {e}")
return {
"success": False,
"agent_name": self.name,
"stance": "bull",
"error": str(e)
}
async def debate_round(self, prompt: str, enable_data_request: bool = True) -> str:
"""
辩论回合发言(用于实时辩论模式)
Args:
prompt: 辩论提示词
enable_data_request: 是否启用数据请求功能
Returns:
发言内容(可能包含数据请求标记)
"""
system_content = f"""你是{self.role},{self.backstory}
你正在参与一场多空辩论,请用专业但有说服力的语气发言。
作为看多方,你的核心任务是:
1. 挖掘公司的增长潜力和投资价值
2. 用数据和事实支撑你的乐观观点
3. 反驳看空方提出的风险点
4. 识别被市场低估的机会"""
if enable_data_request:
system_content += DATA_REQUEST_HINT
try:
response = self._llm_provider.invoke([
{"role": "system", "content": system_content},
{"role": "user", "content": prompt}
])
return response.content if hasattr(response, 'content') else str(response)
except Exception as e:
logger.error(f"Bull debate round failed: {e}")
return f"[发言出错: {e}]"
def _summarize_news(self, news_list: List[Dict[str, Any]]) -> str:
"""汇总新闻信息"""
if not news_list:
return "暂无相关新闻"
summaries = []
for i, news in enumerate(news_list[:5], 1):
title = news.get("title", "")
sentiment = news.get("sentiment_score")
sentiment_text = ""
if sentiment is not None:
if sentiment > 0.1:
sentiment_text = "(利好)"
elif sentiment < -0.1:
sentiment_text = "(利空)"
else:
sentiment_text = "(中性)"
summaries.append(f"{i}. {title} {sentiment_text}")
return "\n".join(summaries)
class BearResearcherAgent(Agent):
"""
看空研究员智能体
职责:基于新闻和数据,识别风险和潜在问题
支持在辩论中请求额外数据
"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
# 先调用父类初始化(Pydantic BaseModel)
super().__init__(
name="BearResearcher",
role="看空研究员",
goal="从风险角度分析股票,识别潜在问题和下行风险",
backstory="""你是一位谨慎的股票研究员,擅长发现被忽视的风险。
你善于从新闻和数据中提取负面信号,分析公司的潜在问题、竞争威胁和市场风险。
你的分析注重风险控制,帮助投资者避免损失。
当你发现数据不足以支撑风险判断时,你会主动请求补充数据。""",
organization_id=organization_id
)
# 在 super().__init__() 之后设置 _llm_provider(避免被 Pydantic 清除)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
def analyze(
self,
stock_code: str,
stock_name: str,
news_list: List[Dict[str, Any]],
context: str = ""
) -> Dict[str, Any]:
"""
生成看空分析报告
"""
news_summary = self._summarize_news(news_list)
# 获取当前系统时间
current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M")
prompt = f"""你是一位看空研究员,请从风险角度分析以下股票:
【当前时间】
{current_time}
【股票信息】
代码:{stock_code}
名称:{stock_name}
【相关新闻摘要】
{news_summary}
【分析背景】
{context if context else "无额外背景信息"}
请从以下角度进行风险分析:
## 1. 核心风险因素
- 列出3-5个主要风险点
- 每个风险需要有数据或新闻支撑
## 2. 负面催化剂
- 短期可能出现的利空事件
- 中长期的结构性风险
## 3. 估值风险
- 当前估值是否过高
- 与同行业对比的劣势
## 4. 下行空间
- 分析可能的下跌幅度
- 触发下跌的条件
## 5. 反驳看多观点
- 针对常见的看多逻辑提出质疑
- 指出乐观预期的不确定性
请确保分析客观、有理有据,避免无根据的悲观。
"""
try:
response = self._llm_provider.invoke([
{"role": "system", "content": f"你是{self.role},{self.backstory}"},
{"role": "user", "content": prompt}
])
analysis_text = response.content if hasattr(response, 'content') else str(response)
return {
"success": True,
"agent_name": self.name,
"agent_role": self.role,
"stance": "bear",
"analysis": analysis_text,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Bear analysis failed: {e}")
return {
"success": False,
"agent_name": self.name,
"stance": "bear",
"error": str(e)
}
def _summarize_news(self, news_list: List[Dict[str, Any]]) -> str:
"""汇总新闻信息"""
if not news_list:
return "暂无相关新闻"
summaries = []
for i, news in enumerate(news_list[:5], 1):
title = news.get("title", "")
sentiment = news.get("sentiment_score")
sentiment_text = ""
if sentiment is not None:
if sentiment > 0.1:
sentiment_text = "(利好)"
elif sentiment < -0.1:
sentiment_text = "(利空)"
else:
sentiment_text = "(中性)"
summaries.append(f"{i}. {title} {sentiment_text}")
return "\n".join(summaries)
async def debate_round(self, prompt: str, enable_data_request: bool = True) -> str:
"""
辩论回合发言(用于实时辩论模式)
Args:
prompt: 辩论提示词
enable_data_request: 是否启用数据请求功能
Returns:
发言内容(可能包含数据请求标记)
"""
system_content = f"""你是{self.role},{self.backstory}
你正在参与一场多空辩论,请用专业但有说服力的语气发言。
作为看空方,你的核心任务是:
1. 识别公司的潜在风险和问题
2. 用数据和事实支撑你的谨慎观点
3. 反驳看多方过于乐观的论点
4. 揭示被市场忽视的风险因素"""
if enable_data_request:
system_content += DATA_REQUEST_HINT
try:
response = self._llm_provider.invoke([
{"role": "system", "content": system_content},
{"role": "user", "content": prompt}
])
return response.content if hasattr(response, 'content') else str(response)
except Exception as e:
logger.error(f"Bear debate round failed: {e}")
return f"[发言出错: {e}]"
class InvestmentManagerAgent(Agent):
"""
投资经理智能体
职责:综合 Bull/Bear 观点,做出最终投资决策
支持在决策前请求额外数据
"""
def __init__(self, llm_provider=None, organization_id: str = "finnews"):
# 先调用父类初始化(Pydantic BaseModel)
super().__init__(
name="InvestmentManager",
role="投资经理",
goal="综合多方观点,做出理性的投资决策",
backstory="""你是一位经验丰富的投资经理,擅长在多方观点中找到平衡。
你善于综合看多和看空的分析,结合市场环境,做出最优的投资决策。
你的决策注重风险收益比,追求稳健的长期回报。
当你认为辩论双方提供的数据不足以做出决策时,你会主动请求补充关键数据。""",
organization_id=organization_id
)
# 在 super().__init__() 之后设置 _llm_provider(避免被 Pydantic 清除)
if llm_provider is None:
llm_provider = get_llm_provider()
object.__setattr__(self, '_llm_provider', llm_provider)
logger.info(f"Initialized {self.name} agent")
def make_decision(
self,
stock_code: str,
stock_name: str,
bull_analysis: str,
bear_analysis: str,
context: str = "",
enable_data_request: bool = False
) -> Dict[str, Any]:
"""
综合双方观点,做出投资决策
Args:
stock_code: 股票代码
stock_name: 股票名称
bull_analysis: 看多分析
bear_analysis: 看空分析
context: 市场背景和补充数据
enable_data_request: 是否允许请求额外数据
"""
# 获取当前系统时间
current_time = datetime.now().strftime("%Y年%m月%d日 %H:%M")
prompt = f"""你是一位投资经理,请综合以下看多和看空观点,做出投资决策:
【当前时间】
{current_time}
【股票信息】
代码:{stock_code}
名称:{stock_name}
【看多观点】
{bull_analysis}
【看空观点】
{bear_analysis}
【市场背景及补充数据】
{context if context else "当前市场处于正常波动区间"}
请按以下结构给出最终决策:
## 1. 观点评估
### 看多方论点质量
- 评估看多论点的说服力(1-10分)
- 指出最有力的看多论据
- 指出看多方忽视的问题
### 看空方论点质量
- 评估看空论点的说服力(1-10分)
- 指出最有力的看空论据
- 指出看空方过于悲观的地方
## 2. 数据充分性评估
- 辩论中使用的数据是否充分?
- 是否有关键数据缺失影响决策?
- 已获得的补充数据如何影响判断?
## 3. 综合判断
- 当前股票的核心矛盾是什么
- 短期(1-3个月)和中长期(6-12个月)的观点
## 4. 投资决策
**最终评级**:[强烈推荐 / 推荐 / 中性 / 谨慎 / 回避]
**决策理由**:
(详细说明决策依据)
**建议操作**:
- 对于持仓者:持有/加仓/减仓/清仓
- 对于观望者:买入/观望/规避
**关键监测指标**:
- 列出需要持续关注的信号
- 什么情况下需要调整决策
## 5. 风险收益比
- 预期收益空间
- 潜在下行风险
- 风险收益比评估
请确保决策客观、理性,充分考虑双方观点和已获取的数据。
"""
if enable_data_request:
prompt += f"""
【数据请求】如果你认为还需要更多数据才能做出准确决策,可以添加搜索请求:
- [SEARCH: "具体数据需求" source:akshare]
- [SEARCH: "新闻关键词" source:bochaai]
但请优先基于现有数据做出判断。"""
try:
response = self._llm_provider.invoke([
{"role": "system", "content": f"你是{self.role},{self.backstory}"},
{"role": "user", "content": prompt}
])
decision_text = response.content if hasattr(response, 'content') else str(response)
# 提取评级
rating = self._extract_rating(decision_text)
return {
"success": True,
"agent_name": self.name,
"agent_role": self.role,
"decision": decision_text,
"rating": rating,
"timestamp": datetime.utcnow().isoformat()
}
except Exception as e:
logger.error(f"Investment decision failed: {e}")
return {
"success": False,
"agent_name": self.name,
"error": str(e)
}
def _extract_rating(self, text: str) -> str:
"""从决策文本中提取评级"""
import re
ratings = ["强烈推荐", "推荐", "中性", "谨慎", "回避"]
for rating in ratings:
if rating in text:
return rating
return "中性"
class DebateWorkflow:
"""
辩论工作流
协调 Bull/Bear/InvestmentManager 进行多轮辩论
"""
def __init__(self, llm_provider=None):
self.bull_agent = BullResearcherAgent(llm_provider)
self.bear_agent = BearResearcherAgent(llm_provider)
self.manager_agent = InvestmentManagerAgent(llm_provider)
# 执行轨迹记录
self.trajectory = []
logger.info("Initialized DebateWorkflow")
async def run_debate(
self,
stock_code: str,
stock_name: str,
news_list: List[Dict[str, Any]],
context: str = "",
rounds: int = 1
) -> Dict[str, Any]:
"""
执行完整的辩论流程
Args:
stock_code: 股票代码
stock_name: 股票名称
news_list: 相关新闻列表
context: 额外上下文
rounds: 辩论轮数
Returns:
辩论结果
"""
start_time = datetime.utcnow()
self.trajectory = []
logger.info(f"🚀 辩论工作流开始: {stock_name}({stock_code}), 新闻数量={len(news_list)}")
try:
# 第一阶段:独立分析
self._log_step("debate_start", {
"stock_code": stock_code,
"stock_name": stock_name,
"news_count": len(news_list)
})
# Bull 分析
logger.info("📈 开始看多分析 (BullResearcher)...")
self._log_step("bull_analysis_start", {"agent": "BullResearcher"})
bull_result = self.bull_agent.analyze(stock_code, stock_name, news_list, context)
logger.info(f"📈 看多分析完成: success={bull_result.get('success', False)}")
self._log_step("bull_analysis_complete", {
"agent": "BullResearcher",
"success": bull_result.get("success", False)
})
# Bear 分析
logger.info("📉 开始看空分析 (BearResearcher)...")
self._log_step("bear_analysis_start", {"agent": "BearResearcher"})
bear_result = self.bear_agent.analyze(stock_code, stock_name, news_list, context)
logger.info(f"📉 看空分析完成: success={bear_result.get('success', False)}")
self._log_step("bear_analysis_complete", {
"agent": "BearResearcher",
"success": bear_result.get("success", False)
})
# 第二阶段:投资经理决策
logger.info("⚖️ 开始投资经理决策 (InvestmentManager)...")
self._log_step("decision_start", {"agent": "InvestmentManager"})
decision_result = self.manager_agent.make_decision(
stock_code=stock_code,
stock_name=stock_name,
bull_analysis=bull_result.get("analysis", ""),
bear_analysis=bear_result.get("analysis", ""),
context=context
)
logger.info(f"⚖️ 投资经理决策完成: rating={decision_result.get('rating', 'unknown')}")
self._log_step("decision_complete", {
"agent": "InvestmentManager",
"rating": decision_result.get("rating", "unknown")
})
end_time = datetime.utcnow()
execution_time = (end_time - start_time).total_seconds()
logger.info(f"✅ 辩论工作流完成! 耗时={execution_time:.2f}秒, 评级={decision_result.get('rating', 'unknown')}")
self._log_step("debate_complete", {
"execution_time": execution_time,
"final_rating": decision_result.get("rating", "unknown")
})
return {
"success": True,
"stock_code": stock_code,
"stock_name": stock_name,
"bull_analysis": bull_result,
"bear_analysis": bear_result,
"final_decision": decision_result,
"trajectory": self.trajectory,
"execution_time": execution_time,
"timestamp": start_time.isoformat()
}
except Exception as e:
logger.error(f"❌ 辩论工作流失败: {e}", exc_info=True)
self._log_step("debate_failed", {"error": str(e)})
return {
"success": False,
"error": str(e),
"trajectory": self.trajectory
}
def _log_step(self, step_name: str, data: Dict[str, Any]):
"""记录执行步骤"""
step = {
"step": step_name,
"timestamp": datetime.utcnow().isoformat(),
"data": data
}
self.trajectory.append(step)
logger.info(f"Debate step: {step_name} - {data}")
# 工厂函数
def create_debate_workflow(llm_provider=None) -> DebateWorkflow:
"""创建辩论工作流实例"""
return DebateWorkflow(llm_provider)
================================================
FILE: backend/app/agents/news_analyst.py
================================================
"""
新闻分析师智能体
"""
import logging
from typing import List, Dict, Any, Optional
from agenticx import Agent, Task, BaseTool
from agenticx.core.agent_executor import AgentExecutor
from ..services.llm_service import get_llm_provider
from ..tools import TextCleanerTool
logger = logging.getLogger(__name__)
class NewsAnalystAgent(Agent):
"""
新闻分析师智能体
职责:分析金融新闻的情感、影响和关键信息
"""
def __init__(
self,
llm_provider=None,
tools: Optional[List[BaseTool]] = None,
organization_id: str = "finnews",
**kwargs
):
"""
初始化新闻分析师智能体
Args:
llm_provider: LLM 提供者
tools: 工具列表
organization_id: 组织ID(用于多租户隔离),默认 "finnews"
**kwargs: 额外参数
"""
# 如果没有提供 LLM,使用默认的
if llm_provider is None:
llm_provider = get_llm_provider()
# 如果没有提供工具,使用默认工具
if tools is None:
tools = [TextCleanerTool()]
# 保存 LLM 和工具供后续使用(在 super().__init__ 之前保存)
self._llm_provider = llm_provider
self._tools = tools
# 定义智能体属性(Agent 基类)
super().__init__(
name="NewsAnalyst",
role="金融新闻分析师",
goal="深度分析金融新闻,提取关键信息,评估市场影响",
backstory="""你是一位经验丰富的金融新闻分析专家,具有10年以上的证券市场分析经验。
你擅长从新闻中提取关键信息,准确判断新闻对股票市场的影响,并能够识别潜在的投资机会和风险。
你的分析报告准确、专业,深受投资者信赖。""",
organization_id=organization_id,
**kwargs
)
# 创建 AgentExecutor(在 super().__init__ 之后)
self._executor = None
self._init_executor(llm_provider, tools)
logger.info(f"Initialized {self.name} agent")
def _init_executor(self, llm_provider=None, tools=None):
"""初始化 AgentExecutor(延迟初始化)"""
if self._executor is None:
if llm_provider is None:
llm_provider = getattr(self, '_llm_provider', None) or get_llm_provider()
if tools is None:
tools = getattr(self, '_tools', None) or [TextCleanerTool()]
self._llm_provider = llm_provider
self._tools = tools
self._executor = AgentExecutor(
llm_provider=llm_provider,
tools=tools
)
@property
def executor(self):
"""获取 AgentExecutor(延迟初始化)"""
if self._executor is None:
self._init_executor()
return self._executor
def analyze_news(
self,
news_title: str,
news_content: str,
news_url: str = "",
stock_codes: List[str] = None
) -> Dict[str, Any]:
"""
分析单条新闻
Args:
news_title: 新闻标题
news_content: 新闻内容
news_url: 新闻URL
stock_codes: 关联股票代码
Returns:
分析结果字典
"""
# 构建分析提示词
prompt = f"""你是一位经验丰富的金融新闻分析专家,具有10年以上的证券市场分析经验。
你擅长从新闻中提取关键信息,准确判断新闻对股票市场的影响,并能够识别潜在的投资机会和风险。
请深度分析以下金融新闻,并提供结构化的分析报告:
【新闻标题】
{news_title}
【新闻内容】
{news_content[:2000]}
【关联股票】
{', '.join(stock_codes) if stock_codes else '无'}
请按照以下结构进行专业分析,并严格使用 Markdown 格式输出:
## 摘要
结构性分析,长期利好市场生态**
### 正面影响:
- 核心要点1
- 核心要点2
- 核心要点3
### 潜在挑战:
- 挑战点1
- 挑战点2
---
## 1. 情感倾向:[中性偏利好] (评分:X.X)
**情感判断**:[中性偏利好/利好/利空/中性]**
**综合评分**:+X.X (范围:-1 至 +1)**
**理由说明:**
详细说明评分依据,包括:
- 政策影响分析
- 市场短期/长期影响
- 预期收益/风险评估
---
## 2. 关键信息提取
**请使用标准 Markdown 表格格式,确保表格清晰易读:**
| 类别 | 内容 |
|------|------|
| 公司名称 | XXX公司(全称,股票代码:XXXXXX) |
| 事件时间 | 新闻发布时间:YYYY年MM月DD日;关键事件时间线涵盖YYYY年QXXX |
| 股价变动 | 详细描述股价变化趋势和数据 |
| 财务表现(YYYY年QX) | 关键财务指标(使用具体数字和增长率) |
| 驱动因素 | • 因素1<br>• 因素2<br>• 因素3 |
| 分析师观点 | • 机构1(分析师):观点内容<br>• 机构2(分析师):观点内容 |
| 市场情绪指标 | 具体指标和数据 |
**重要说明(表格严格规范)**:
- **禁止跨行**:同一类别下的所有内容必须在**同一行**的单元格内
- **强制换行**:如果同一单元格有多条内容,**必须**使用 `<br>` 分隔,**严禁**使用 Markdown 列表(- 或 1.)或直接换行
- **错误示例**(绝对禁止):
| 驱动因素 | • 因素1 |
| | • 因素2 | <-- 错误!不能另起一行
- **正确示例**:
| 驱动因素 | • 因素1<br>• 因素2 |
- 表头和内容之间用 `|------|------|` 分隔
- 数据要准确,有具体数字时必须标注
---
## 3. 市场影响分析
### 短期影响(1-3个月)
- 影响点1:具体分析
- 影响点2:具体分析
### 中期影响(3-12个月)
- 影响点1:具体分析
- 影响点2:具体分析
### 长期影响(1年以上)
- 影响点1:具体分析
- 影响点2:具体分析
---
## 4. 投资建议
**投资评级**:[推荐买入/谨慎持有/观望/减持]
**建议理由**:
1. 核心逻辑1
2. 核心逻辑2
3. 核心逻辑3
**风险提示**:
- 风险1
- 风险2
---
**格式要求(重要)**:
1. 必须使用标准 Markdown 语法
2. **表格内容严禁跨行**,单元格内换行只能用 `<br>`
3. 标题层级清晰:使用 ##、### 等
4. 列表使用 - 或数字编号(表格外)
5. 加粗使用 **文本**
6. 分隔线使用 ---
7. 评分必须精确到小数点后1位
8. 所有数据必须真实、准确,来源于新闻内容
请确保分析报告专业、准确、结构清晰,特别注意表格格式的规范性,避免表格行错位。
"""
try:
# 确保 LLM provider 已初始化
if not hasattr(self, '_llm_provider') or self._llm_provider is None:
self._llm_provider = get_llm_provider()
logger.info(f"Calling LLM provider: {type(self._llm_provider).__name__}, model: {getattr(self._llm_provider, 'model', 'unknown')}")
# 直接调用 LLM(不使用 AgentExecutor,避免审批暂停)
response = self._llm_provider.invoke([
{"role": "system", "content": f"你是{self.role},{self.backstory}"},
{"role": "user", "content": prompt}
])
logger.info("LLM response received")
# 获取分析结果
analysis_text = response.content if hasattr(response, 'content') else str(response)
# 修复 Markdown 表格格式
analysis_text = self._repair_markdown_table(analysis_text)
# 尝试提取结构化信息
structured_result = self._extract_structured_info(analysis_text)
return {
"success": True,
"analysis_result": analysis_text,
"structured_data": structured_result,
"agent_name": self.name,
"agent_role": self.role,
}
except Exception as e:
logger.error(f"News analysis failed: {e}", exc_info=True)
return {
"success": False,
"error": str(e),
"agent_name": self.name,
}
def _repair_markdown_table(self, text: str) -> str:
"""
修复 Markdown 表格格式问题
主要解决:多行内容被错误拆分为多行单元格,导致首列为空的问题
"""
import re
lines = text.split('\n')
new_lines = []
in_table = False
last_table_line_idx = -1
for line in lines:
stripped = line.strip()
# 检测表格行
is_table_row = stripped.startswith('|') and stripped.endswith('|')
is_separator = '---' in stripped and '|' in stripped
if is_table_row:
if not in_table:
in_table = True
# 如果是分隔行,直接添加
if is_separator:
new_lines.append(line)
last_table_line_idx = len(new_lines) - 1
continue
# 检查是否是"坏行"(首列为空)
# 匹配模式:| 空白 | 内容 |
parts = [p.strip() for p in stripped.strip('|').split('|')]
# 如果首列为空,且不是第一行,且上一行也是表格行
if len(parts) >= 2 and not parts[0] and last_table_line_idx >= 0:
# 获取上一行
prev_line = new_lines[last_table_line_idx]
prev_parts = [p.strip() for p in prev_line.strip().strip('|').split('|')]
# 确保列数匹配
if len(parts) == len(prev_parts):
# 将内容合并到上一行的对应列
for i in range(1, len(parts)):
if parts[i]:
prev_parts[i] = f"{prev_parts[i]}<br>• {parts[i]}" if parts[i].startswith('•') else f"{prev_parts[i]}<br>{parts[i]}"
# 重建上一行
new_prev_line = '| ' + ' | '.join(prev_parts) + ' |'
new_lines[last_table_line_idx] = new_prev_line
# 当前行被合并,不添加到 new_lines
continue
else:
in_table = False
new_lines.append(line)
if in_table:
last_table_line_idx = len(new_lines) - 1
return '\n'.join(new_lines)
def _extract_structured_info(self, analysis_text: str) -> Dict[str, Any]:
"""
从分析文本中提取结构化信息
Args:
analysis_text: 分析文本
Returns:
结构化数据
"""
import re
result = {
"sentiment": "neutral",
"sentiment_score": 0.0,
"confidence": 0.5,
"key_points": [],
"market_impact": "",
"investment_advice": "",
}
try:
# 提取情感倾向(支持多种格式)
# 匹配:利好、利空、中性、显著利好、显著利空等
sentiment_patterns = [
r'情感倾向[::]\s*\*?\*?(显著|明显)?(利好|利空|中性)',
r'(显著|明显)?(利好|利空|中性)', # 备用模式
]
for pattern in sentiment_patterns:
sentiment_match = re.search(pattern, analysis_text)
if sentiment_match:
# 提取最后一个匹配的词(利好/利空/中性)
groups = [g for g in sentiment_match.groups() if g]
if groups:
sentiment_word = groups[-1]
sentiment_map = {"利好": "positive", "利空": "negative", "中性": "neutral"}
result["sentiment"] = sentiment_map.get(sentiment_word, "neutral")
break
# 提取情感评分(支持多种格式)
# 匹配:-0.92、**-0.92**、-0.92 / -1.0 等格式
score_patterns = [
r'综合评分[::]\s*\*?\*?([-+]?\d*\.?\d+)', # 综合评分:-0.92(优先级最高)
r'评分[::]\s*\*?\*?([-+]?\d*\.?\d+)\s*/\s*[-+]?\d*\.?\d+', # 评分:-0.85 / 1.0
r'情感评分[::]\s*\*?\*?([-+]?\d*\.?\d+)', # 情感评分:-0.92
r'评分[::]\s*\*?\*?([-+]?\d*\.?\d+)', # 评分:-0.92
]
for pattern in score_patterns:
score_match = re.search(pattern, analysis_text)
if score_match:
result["sentiment_score"] = float(score_match.group(1))
logger.info(f"Extracted sentiment score: {result['sentiment_score']}")
break
# 如果未提取到评分,尝试从情感倾向推断
if result["sentiment_score"] == 0.0 and result["sentiment"] != "neutral":
if result["sentiment"] == "positive":
result["sentiment_score"] = 0.5 # 默认中等利好
elif result["sentiment"] == "negative":
result["sentiment_score"] = -0.5 # 默认中等利空
# 提取置信度
confidence_match = re.search(r'置信度[::]\s*\*?\*?(\d*\.?\d+)', analysis_text)
if confidence_match:
result["confidence"] = float(confidence_match.group(1))
# 提取关键信息点(简单实现:查找列表)
key_points_section = re.search(r'关键信息[::](.*?)(?=市场影响|投资建议|$)', analysis_text, re.DOTALL)
if key_points_section:
points_text = key_points_section.group(1)
points = re.findall(r'[•\-\*]\s*(.+)', points_text)
result["key_points"] = [p.strip() for p in points if p.strip()]
# 提取市场影响
impact_match = re.search(r'市场影响[::](.*?)(?=投资建议|置信度|$)', analysis_text, re.DOTALL)
if impact_match:
result["market_impact"] = impact_match.group(1).strip()
# 提取投资建议
advice_match = re.search(r'投资建议[::](.*?)(?=置信度|$)', analysis_text, re.DOTALL)
if advice_match:
result["investment_advice"] = advice_match.group(1).strip()
except Exception as e:
logger.warning(f"Failed to extract structured info: {e}")
# 日志记录提取结果
logger.info(
f"Extracted sentiment: {result['sentiment']}, "
f"score: {result['sentiment_score']}, "
f"confidence: {result['confidence']}"
)
return result
def batch_analyze(
self,
news_list: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""
批量分析新闻
Args:
news_list: 新闻列表
Returns:
分析结果列表
"""
results = []
for news in news_list:
try:
result = self.analyze_news(
news_title=news.get("title", ""),
news_content=news.get("content", ""),
news_url=news.get("url", ""),
stock_codes=news.get("stock_codes", [])
)
results.append(result)
except Exception as e:
logger.error(f"Failed to analyze news: {e}")
results.append({
"success": False,
"error": str(e),
"news_url": news.get("url", "")
})
return results
def create_news_analyst(
llm_provider=None,
tools: Optional[List[BaseTool]] = None,
organization_id: str = "finnews"
) -> NewsAnalystAgent:
"""
创建新闻分析师智能体实例
Args:
llm_provider: LLM 提供者
tools: 工具列表
organization_id: 组织ID(用于多租户隔离),默认 "finnews"
Returns:
NewsAnalystAgent 实例
"""
return NewsAnalystAgent(
llm_provider=llm_provider,
tools=tools,
organization_id=organization_id
)
================================================
FILE: backend/app/agents/orchestrator.py
================================================
"""
协作编排器
负责管理多智能体协作流程,支持:
- 并行分析模式(parallel)
- 实时辩论模式(realtime_debate)
- 快速分析模式(quick_analysis)
- 动态搜索模式(在辩论过程中按需获取数据)
"""
import logging
import asyncio
from typing import Dict, Any, List, Optional, Callable, AsyncGenerator
from datetime import datetime
from enum import Enum
from ..config import get_mode_config, get_default_mode, DebateModeConfig
from ..services.llm_service import get_llm_provider
logger = logging.getLogger(__name__)
class DebatePhase(Enum):
"""辩论阶段"""
INITIALIZING = "initializing"
DATA_COLLECTION = "data_collection"
OPENING = "opening"
DEBATE = "debate"
CLOSING = "closing"
COMPLETED = "completed"
FAILED = "failed"
class DebateEvent:
"""辩论事件(用于实时流式输出)"""
def __init__(
self,
event_type: str,
agent_name: str,
content: str,
phase: DebatePhase,
round_number: Optional[int] = None,
metadata: Optional[Dict[str, Any]] = None
):
self.event_type = event_type
self.agent_name = agent_name
self.content = content
self.phase = phase
self.round_number = round_number
self.metadata = metadata or {}
self.timestamp = datetime.utcnow().isoformat()
def to_dict(self) -> Dict[str, Any]:
return {
"event_type": self.event_type,
"agent_name": self.agent_name,
"content": self.content,
"phase": self.phase.value,
"round_number": self.round_number,
"metadata": self.metadata,
"timestamp": self.timestamp
}
class DebateOrchestrator:
"""辩论编排器"""
def __init__(
self,
mode: str = None,
llm_provider=None,
enable_dynamic_search: bool = True
):
"""
初始化辩论编排器
Args:
mode: 辩论模式 (parallel, realtime_debate, quick_analysis)
llm_provider: LLM 提供者
enable_dynamic_search: 是否启用动态搜索(辩论中按需获取数据)
"""
self.mode = mode or get_default_mode()
self.config = get_mode_config(self.mode)
if not self.config:
raise ValueError(f"未知的辩论模式: {self.mode}")
self.llm_provider = llm_provider or get_llm_provider()
self.current_phase = DebatePhase.INITIALIZING
self.current_round = 0
self.start_time: Optional[datetime] = None
self.events: List[DebateEvent] = []
self.is_interrupted = False
# 动态搜索配置
self.enable_dynamic_search = enable_dynamic_search
self._search_analyst = None
# 搜索统计
self.search_stats = {
"total_requests": 0,
"successful_searches": 0,
"data_supplements": []
}
# 事件回调
self._event_callbacks: List[Callable[[DebateEvent], None]] = []
logger.info(f"🎭 初始化辩论编排器,模式: {self.mode}, 动态搜索: {enable_dynamic_search}")
def _get_search_analyst(self):
"""懒加载搜索分析师"""
if self._search_analyst is None and self.enable_dynamic_search:
from .search_analyst import SearchAnalystAgent
self._search_analyst = SearchAnalystAgent(self.llm_provider)
return self._search_analyst
def on_event(self, callback: Callable[[DebateEvent], None]):
"""注册事件回调"""
self._event_callbacks.append(callback)
def _emit_event(self, event: DebateEvent):
"""触发事件"""
self.events.append(event)
for callback in self._event_callbacks:
try:
callback(event)
except Exception as e:
logger.error(f"事件回调出错: {e}")
def interrupt(self, reason: str = "manager_decision"):
"""打断辩论"""
self.is_interrupted = True
self._emit_event(DebateEvent(
event_type="interrupt",
agent_name="InvestmentManager",
content=f"辩论被打断: {reason}",
phase=self.current_phase
))
logger.info(f"⚡ 辩论被打断: {reason}")
async def run(
self,
stock_code: str,
stock_name: str,
context: str = "",
news_list: List[Dict[str, Any]] = None
) -> Dict[str, Any]:
"""运行辩论流程"""
self.start_time = datetime.utcnow()
result = {
"success": False,
"mode": self.mode,
"stock_code": stock_code,
"stock_name": stock_name,
"trajectory": [],
"events": []
}
try:
self._emit_event(DebateEvent(
event_type="start",
agent_name="Orchestrator",
content=f"开始 {self.config.name}",
phase=DebatePhase.INITIALIZING
))
# 根据模式选择执行流程
if self.config.flow.type == "parallel_then_summarize":
result = await self._run_parallel_mode(stock_code, stock_name, context, news_list)
elif self.config.flow.type == "orchestrated_debate":
result = await self._run_realtime_debate_mode(stock_code, stock_name, context, news_list)
elif self.config.flow.type == "single_agent":
result = await self._run_quick_mode(stock_code, stock_name, context)
else:
raise ValueError(f"未知的流程类型: {self.config.flow.type}")
self.current_phase = DebatePhase.COMPLETED
self._emit_event(DebateEvent(
event_type="complete",
agent_name="Orchestrator",
content="辩论完成",
phase=DebatePhase.COMPLETED
))
except Exception as e:
logger.error(f"辩论执行失败: {e}", exc_info=True)
self.current_phase = DebatePhase.FAILED
result["error"] = str(e)
self._emit_event(DebateEvent(
event_type="error",
agent_name="Orchestrator",
content=f"辩论失败: {e}",
phase=DebatePhase.FAILED
))
result["events"] = [e.to_dict() for e in self.events]
result["execution_time"] = (datetime.utcnow() - self.start_time).total_seconds()
return result
async def _run_parallel_mode(
self,
stock_code: str,
stock_name: str,
context: str,
news_list: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""运行并行分析模式"""
from .debate_agents import BullResearcherAgent, BearResearcherAgent, InvestmentManagerAgent
logger.info("🔄 执行并行分析模式")
# 初始化智能体
bull_agent = BullResearcherAgent(self.llm_provider)
bear_agent = BearResearcherAgent(self.llm_provider)
manager_agent = InvestmentManagerAgent(self.llm_provider)
# 准备新闻摘要
news_summary = self._prepare_news_summary(news_list)
full_context = f"{context}\n\n{news_summary}" if context else news_summary
self.current_phase = DebatePhase.DEBATE
# 并行执行Bull和Bear分析
self._emit_event(DebateEvent(
event_type="analysis_start",
agent_name="BullResearcher",
content="开始看多分析",
phase=self.current_phase
))
self._emit_event(DebateEvent(
event_type="analysis_start",
agent_name="BearResearcher",
content="开始看空分析",
phase=self.current_phase
))
bull_task = asyncio.create_task(
bull_agent.analyze(stock_code, stock_name, full_context)
)
bear_task = asyncio.create_task(
bear_agent.analyze(stock_code, stock_name, full_context)
)
bull_analysis, bear_analysis = await asyncio.gather(bull_task, bear_task)
self._emit_event(DebateEvent(
event_type="analysis_complete",
agent_name="BullResearcher",
content=bull_analysis.get("analysis", "")[:200] + "...",
phase=self.current_phase
))
self._emit_event(DebateEvent(
event_type="analysis_complete",
agent_name="BearResearcher",
content=bear_analysis.get("analysis", "")[:200] + "...",
phase=self.current_phase
))
# 投资经理做决策
self.current_phase = DebatePhase.CLOSING
self._emit_event(DebateEvent(
event_type="decision_start",
agent_name="InvestmentManager",
content="开始综合决策",
phase=self.current_phase
))
final_decision = await manager_agent.make_decision(
stock_code=stock_code,
stock_name=stock_name,
bull_analysis=bull_analysis.get("analysis", ""),
bear_analysis=bear_analysis.get("analysis", ""),
context=full_context
)
self._emit_event(DebateEvent(
event_type="decision_complete",
agent_name="InvestmentManager",
content=f"决策完成: {final_decision.get('rating', 'N/A')}",
phase=self.current_phase
))
return {
"success": True,
"mode": self.mode,
"bull_analysis": bull_analysis,
"bear_analysis": bear_analysis,
"final_decision": final_decision,
"trajectory": [
{"agent": "BullResearcher", "action": "analyze", "status": "completed"},
{"agent": "BearResearcher", "action": "analyze", "status": "completed"},
{"agent": "InvestmentManager", "action": "decide", "status": "completed"}
]
}
async def _run_realtime_debate_mode(
self,
stock_code: str,
stock_name: str,
context: str,
news_list: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""运行实时辩论模式(支持动态搜索)"""
from .debate_agents import BullResearcherAgent, BearResearcherAgent, InvestmentManagerAgent
from .data_collector import DataCollectorAgent
logger.info("🎭 执行实时辩论模式")
# 初始化智能体
data_collector = DataCollectorAgent(self.llm_provider)
bull_agent = BullResearcherAgent(self.llm_provider)
bear_agent = BearResearcherAgent(self.llm_provider)
manager_agent = InvestmentManagerAgent(self.llm_provider)
# 获取搜索分析师(如果启用)
search_analyst = self._get_search_analyst()
rules = self.config.rules
max_rounds = rules.max_rounds or 5
max_time = rules.max_time or 600
trajectory = []
debate_history = []
dynamic_data_supplements = [] # 记录动态搜索补充的数据
# Phase 1: 数据搜集
if rules.require_data_collection:
self.current_phase = DebatePhase.DATA_COLLECTION
self._emit_event(DebateEvent(
event_type="phase_start",
agent_name="DataCollector",
content="开始搜集数据",
phase=self.current_phase
))
collected_data = await data_collector.collect_data(stock_code, stock_name)
data_summary = collected_data.get("summary", "")
self._emit_event(DebateEvent(
event_type="data_collected",
agent_name="DataCollector",
content=data_summary[:300] + "...",
phase=self.current_phase
))
trajectory.append({
"agent": "DataCollector",
"action": "collect_data",
"status": "completed"
})
# 合并数据到上下文
context = f"{context}\n\n{data_summary}" if context else data_summary
# Phase 2: 投资经理开场
self.current_phase = DebatePhase.OPENING
opening_prompt = f"""你是投资经理,现在要主持一场关于 {stock_name}({stock_code}) 的多空辩论。
请做开场陈述,说明:
1. 今天辩论的股票背景
2. 辩论的规则(最多{max_rounds}轮,每人每轮1分钟)
3. 请看多研究员先发言
背景资料:
{context[:2000]}"""
self._emit_event(DebateEvent(
event_type="opening",
agent_name="InvestmentManager",
content="投资经理开场中...",
phase=self.current_phase
))
opening = await self.llm_provider.chat(opening_prompt)
self._emit_event(DebateEvent(
event_type="speech",
agent_name="InvestmentManager",
content=opening,
phase=self.current_phase,
round_number=0
))
trajectory.append({
"agent": "InvestmentManager",
"action": "opening",
"status": "completed",
"content": opening
})
debate_history.append({
"round": 0,
"agent": "InvestmentManager",
"type": "opening",
"content": opening
})
# Phase 3: 辩论回合
self.current_phase = DebatePhase.DEBATE
bull_analysis_full = ""
bear_analysis_full = ""
for round_num in range(1, max_rounds + 1):
if self.is_interrupted:
logger.info(f"辩论在第{round_num}轮被打断")
break
# 检查时间限制
elapsed = (datetime.utcnow() - self.start_time).total_seconds()
if elapsed > max_time:
logger.info(f"辩论超时,已进行 {elapsed:.0f} 秒")
break
self.current_round = round_num
# Bull发言
self._emit_event(DebateEvent(
event_type="round_start",
agent_name="BullResearcher",
content=f"第{round_num}轮 - 看多研究员发言",
phase=self.current_phase,
round_number=round_num
))
bull_prompt = self._build_debate_prompt(
agent_role="看多研究员",
stock_name=stock_name,
stock_code=stock_code,
round_num=round_num,
max_rounds=max_rounds,
context=context,
debate_history=debate_history,
enable_search_requests=self.enable_dynamic_search
)
bull_response = await bull_agent.debate_round(bull_prompt)
bull_analysis_full += f"\n\n### 第{round_num}轮\n{bull_response}"
self._emit_event(DebateEvent(
event_type="speech",
agent_name="BullResearcher",
content=bull_response,
phase=self.current_phase,
round_number=round_num
))
debate_history.append({
"round": round_num,
"agent": "BullResearcher",
"type": "argument",
"content": bull_response
})
# 动态搜索:处理 Bull 发言中的数据请求
if search_analyst:
context, supplement = await self._process_speech_for_search(
search_analyst=search_analyst,
speech_text=bull_response,
agent_name="BullResearcher",
stock_code=stock_code,
stock_name=stock_name,
context=context,
round_num=round_num,
trajectory=trajectory
)
if supplement:
dynamic_data_supplements.append(supplement)
# Bear发言
self._emit_event(DebateEvent(
event_type="round_continue",
agent_name="BearResearcher",
content=f"第{round_num}轮 - 看空研究员发言",
phase=self.current_phase,
round_number=round_num
))
bear_prompt = self._build_debate_prompt(
agent_role="看空研究员",
stock_name=stock_name,
stock_code=stock_code,
round_num=round_num,
max_rounds=max_rounds,
context=context,
debate_history=debate_history,
enable_search_requests=self.enable_dynamic_search
)
bear_response = await bear_agent.debate_round(bear_prompt)
bear_analysis_full += f"\n\n### 第{round_num}轮\n{bear_response}"
self._emit_event(DebateEvent(
event_type="speech",
agent_name="BearResearcher",
content=bear_response,
phase=self.current_phase,
round_number=round_num
))
debate_history.append({
"round": round_num,
"agent": "BearResearcher",
"type": "argument",
"content": bear_response
})
# 动态搜索:处理 Bear 发言中的数据请求
if search_analyst:
context, supplement = await self._process_speech_for_search(
search_analyst=search_analyst,
speech_text=bear_response,
agent_name="BearResearcher",
stock_code=stock_code,
stock_name=stock_name,
context=context,
round_num=round_num,
trajectory=trajectory
)
if supplement:
dynamic_data_supplements.append(supplement)
trajectory.append({
"agent": "Debate",
"action": f"round_{round_num}",
"status": "completed"
})
# 投资经理可选择打断或请求更多数据
if rules.manager_can_interrupt and round_num < max_rounds:
should_interrupt, manager_data_request = await self._check_manager_interrupt_or_search(
manager_agent, debate_history, stock_name, stock_code,
search_analyst, context
)
# 如果经理请求了更多数据,更新上下文
if manager_data_request:
context = f"{context}\n\n【投资经理补充数据】\n{manager_data_request}"
dynamic_data_supplements.append({
"round": round_num,
"agent": "InvestmentManager",
"data": manager_data_request
})
if should_interrupt:
self.interrupt("投资经理认为已有足够信息做决策")
break
# Phase 4: 投资经理总结决策
self.current_phase = DebatePhase.CLOSING
self._emit_event(DebateEvent(
event_type="closing_start",
agent_name="InvestmentManager",
content="投资经理正在做最终决策...",
phase=self.current_phase
))
# 如果启用了动态搜索,在做决策前进行智能数据补充
if search_analyst and len(dynamic_data_supplements) < 2:
self._emit_event(DebateEvent(
event_type="smart_supplement",
agent_name="SearchAnalyst",
content="智能分析数据缺口,补充关键信息...",
phase=self.current_phase
))
smart_result = await search_analyst.smart_data_supplement(
stock_code=stock_code,
stock_name=stock_name,
existing_context=context,
debate_history=debate_history
)
if smart_result.get("success") and smart_result.get("combined_summary"):
context = f"{context}\n\n【智能补充数据】\n{smart_result['combined_summary']}"
dynamic_data_supplements.append({
"round": "pre_decision",
"agent": "SearchAnalyst",
"data": smart_result["combined_summary"]
})
final_decision = await manager_agent.make_decision(
stock_code=stock_code,
stock_name=stock_name,
bull_analysis=bull_analysis_full,
bear_analysis=bear_analysis_full,
context=f"{context}\n\n辩论历史:\n{self._format_debate_history(debate_history)}"
)
self._emit_event(DebateEvent(
event_type="decision",
agent_name="InvestmentManager",
content=final_decision.get("summary", ""),
phase=self.current_phase,
metadata={"rating": final_decision.get("rating")}
))
trajectory.append({
"agent": "InvestmentManager",
"action": "final_decision",
"status": "completed"
})
return {
"success": True,
"mode": self.mode,
"bull_analysis": {"analysis": bull_analysis_full, "success": True},
"bear_analysis": {"analysis": bear_analysis_full, "success": True},
"final_decision": final_decision,
"debate_history": debate_history,
"total_rounds": self.current_round,
"was_interrupted": self.is_interrupted,
"trajectory": trajectory,
"dynamic_search_enabled": self.enable_dynamic_search,
"data_supplements": dynamic_data_supplements,
"search_stats": self.search_stats
}
async def _process_speech_for_search(
self,
search_analyst,
speech_text: str,
agent_name: str,
stock_code: str,
stock_name: str,
context: str,
round_num: int,
trajectory: List[Dict]
) -> tuple:
"""
处理发言中的搜索请求
Returns:
(updated_context, supplement_data)
"""
try:
result = await search_analyst.process_debate_speech(
speech_text=speech_text,
stock_code=stock_code,
stock_name=stock_name,
agent_name=agent_name
)
self.search_stats["total_requests"] += result.get("requests_found", 0)
if result.get("success") and result.get("combined_summary"):
self.search_stats["successful_searches"] += len(result.get("search_results", []))
self._emit_event(DebateEvent(
event_type="dynamic_search",
agent_name="SearchAnalyst",
content=f"为 {agent_name} 补充了 {result['requests_found']} 项数据",
phase=self.current_phase,
round_number=round_num,
metadata={"requests": result["requests_found"]}
))
trajectory.append({
"agent": "SearchAnalyst",
"action": f"search_for_{agent_name}",
"status": "completed",
"requests": result["requests_found"]
})
# 更新上下文
new_context = f"{context}\n\n【{agent_name} 请求的补充数据】\n{result['combined_summary']}"
supplement = {
"round": round_num,
"agent": agent_name,
"requests": result["requests_found"],
"data": result["combined_summary"][:500]
}
return new_context, supplement
except Exception as e:
logger.warning(f"处理搜索请求时出错: {e}")
return context, None
async def _run_quick_mode(
self,
stock_code: str,
stock_name: str,
context: str
) -> Dict[str, Any]:
"""运行快速分析模式"""
from .data_collector import QuickAnalystAgent
logger.info("🚀 执行快速分析模式")
quick_analyst = QuickAnalystAgent(self.llm_provider)
self.current_phase = DebatePhase.DEBATE
self._emit_event(DebateEvent(
event_type="quick_analysis_start",
agent_name="QuickAnalyst",
content="开始快速分析",
phase=self.current_phase
))
result = await quick_analyst.quick_analyze(stock_code, stock_name, context)
self._emit_event(DebateEvent(
event_type="quick_analysis_complete",
agent_name="QuickAnalyst",
content=result.get("analysis", "")[:200] + "...",
phase=self.current_phase
))
return {
"success": result.get("success", False),
"mode": self.mode,
"quick_analysis": result,
"trajectory": [
{"agent": "QuickAnalyst", "action": "analyze", "status": "completed"}
]
}
def _prepare_news_summary(self, news_list: List[Dict[str, Any]]) -> str:
"""准备新闻摘要"""
if not news_list:
return "暂无相关新闻数据"
summary_parts = ["## 相关新闻摘要\n"]
for i, news in enumerate(news_list[:10], 1):
title = news.get("title", "无标题")
content = news.get("content", "")[:200]
source = news.get("source", "未知来源")
date = news.get("published_at", "")
summary_parts.append(f"{i}. **{title}** ({source}, {date})\n {content}...\n")
return "\n".join(summary_parts)
def _build_debate_prompt(
self,
agent_role: str,
stock_name: str,
stock_code: str,
round_num: int,
max_rounds: int,
context: str,
debate_history: List[Dict],
enable_search_requests: bool = False
) -> str:
"""构建辩论提示词"""
history_text = self._format_debate_history(debate_history[-4:]) # 只取最近4条
# 基础提示词
prompt = f"""你是{agent_role},正在参与关于 {stock_name}({stock_code}) 的多空辩论。
当前是第 {round_num}/{max_rounds} 轮辩论。
背景资料:
{context[:1500]}
最近的辩论历史:
{history_text}
请发表你的观点(约200字):
1. 如果是第一轮,阐述你的核心论点
2. 如果不是第一轮,先反驳对方观点,再补充新论据
3. 用数据和事实支持你的论点
4. 语气专业但有说服力"""
# 如果启用了动态搜索,添加搜索请求说明
if enable_search_requests:
prompt += """
【数据请求功能】
如果你在分析过程中发现缺少关键数据,可以在发言中使用以下格式请求搜索:
- [SEARCH: "最新的毛利率数据" source:akshare] -- 从AkShare获取财务数据
- [SEARCH: "最近的行业新闻" source:bochaai] -- 从网络搜索新闻
- [SEARCH: "近期资金流向" source:akshare] -- 获取资金流向
- [SEARCH: "竞品对比分析"] -- 不指定来源则自动选择
搜索请求会在你发言后自动执行,数据会补充到下一轮的背景资料中。
请只在确实需要更多数据支撑论点时才使用搜索请求,每次最多1-2个。"""
return prompt
def _format_debate_history(self, history: List[Dict]) -> str:
"""格式化辩论历史"""
if not history:
return "(尚无辩论历史)"
lines = []
for item in history:
agent = item.get("agent", "Unknown")
content = item.get("content", "")[:300]
round_num = item.get("round", 0)
lines.append(f"[第{round_num}轮 - {agent}]: {content}")
return "\n\n".join(lines)
async def _check_manager_interrupt(
self,
manager_agent,
debate_history: List[Dict],
stock_name: str
) -> bool:
"""检查投资经理是否要打断辩论"""
if len(debate_history) < 4:
return False
check_prompt = f"""你是投资经理,正在主持关于 {stock_name} 的辩论。
目前的辩论历史:
{self._format_debate_history(debate_history[-4:])}
请判断:你是否已经获得足够的信息来做出投资决策?
如果是,回复"是";如果还需要更多辩论,回复"否"。
只回复一个字。"""
try:
response = await self.llm_provider.chat(check_prompt)
return "是" in response[:5]
except Exception:
return False
async def _check_manager_interrupt_or_search(
self,
manager_agent,
debate_history: List[Dict],
stock_name: str,
stock_code: str,
search_analyst,
context: str
) -> tuple:
"""
检查投资经理是否要打断辩论或请求更多数据
Returns:
(should_interrupt: bool, additional_data: str or None)
"""
if len(debate_history) < 4:
return False, None
# 如果没有搜索分析师,使用简单的打断检查
if not search_analyst:
should_interrupt = await self._check_manager_interrupt(
manager_agent, debate_history, stock_name
)
return should_interrupt, None
check_prompt = f"""你是投资经理,正在主持关于 {stock_name}({stock_code}) 的多空辩论。
目前的辩论历史:
{self._format_debate_history(debate_history[-4:])}
请判断当前情况:
1. 如果你已经获得足够的信息做决策,回复:决策就绪
2. 如果你需要更多数据支持,使用以下格式请求:
[SEARCH: "你需要的具体数据" source:数据源]
可用数据源: akshare(财务/行情), bochaai(新闻), browser(网页搜索)
请只回复"决策就绪"或搜索请求,不要添加其他内容。"""
try:
response = await self.llm_provider.chat(check_prompt)
# 检查是否决策就绪
if "决策就绪" in response:
return True, None
# 检查是否有搜索请求
requests = search_analyst.extract_search_requests(response)
if requests:
self._emit_event(DebateEvent(
event_type="manager_search_request",
agent_name="InvestmentManager",
content=f"投资经理请求 {len(requests)} 项补充数据",
phase=self.current_phase,
round_number=self.current_round
))
# 执行搜索
search_result = await search_analyst.process_debate_speech(
speech_text=response,
stock_code=stock_code,
stock_name=stock_name,
agent_name="InvestmentManager"
)
if search_result.get("success") and search_result.get("combined_summary"):
self.search_stats["total_requests"] += len(requests)
self.search_stats["successful_searches"] += len(search_result.get("search_results", []))
return False, search_result["combined_summary"]
return False, None
except Exception as e:
logger.warning(f"检查经理决策时出错: {e}")
return False, None
def create_orchestrator(
mode: str = None,
llm_provider=None,
enable_dynamic_search: bool = True
) -> DebateOrchestrator:
"""
创建辩论编排器
Args:
mode: 辩论模式 (parallel, realtime_debate, quick_analysis)
llm_provider: LLM 提供者
enable_dynamic_search: 是否启用动态搜索
Returns:
DebateOrchestrator 实例
"""
return DebateOrchestrator(
mode=mode,
llm_provider=llm_provider,
enable_dynamic_search=enable_dynamic_search
)
================================================
FILE: backend/app/agents/quantitative_agent.py
================================================
"""
量化分析智能体
负责量化因子挖掘、技术分析和量化策略生成。
集成 Alpha Mining 模块,提供自动化因子发现能力。
功能:
- 因子挖掘:使用 RL 自动发现有效交易因子
- 因子评估:评估因子的预测能力和回测表现
- 技术分析:结合传统技术指标进行分析
- 策略生成:基于因子生成交易策略建议
"""
import logging
import asyncio
from typing import Dict, Any, List, Optional
from datetime import datetime
import json
logger = logging.getLogger(__name__)
class QuantitativeAgent:
"""
量化分析智能体
集成 Alpha Mining 模块,提供因子挖掘和量化分析能力。
Args:
llm_provider: LLM 提供者
enable_alpha_mining: 是否启用因子挖掘
model_path: 预训练模型路径
Example:
agent = QuantitativeAgent(llm_provider)
result = await agent.analyze(stock_code, stock_name, market_data)
"""
def __init__(
self,
llm_provider=None,
enable_alpha_mining: bool = True,
model_path: Optional[str] = None
):
self.llm_provider = llm_provider
self.enable_alpha_mining = enable_alpha_mining
self.model_path = model_path
# 延迟初始化 Alpha Mining 组件
self._alpha_mining_initialized = False
self._generator = None
self._trainer = None
self._vm = None
self._evaluator = None
self._market_builder = None
self._sentiment_builder = None
# 存储发现的因子
self.discovered_factors: List[Dict[str, Any]] = []
logger.info(f"QuantitativeAgent initialized (alpha_mining={enable_alpha_mining})")
def _init_alpha_mining(self):
"""延迟初始化 Alpha Mining 组件"""
if self._alpha_mining_initialized:
return
try:
from ..alpha_mining import (
AlphaMiningConfig,
FactorVocab,
FactorVM,
AlphaGenerator,
AlphaTrainer,
FactorEvaluator,
MarketFeatureBuilder,
SentimentFeatureBuilder
)
config = AlphaMiningConfig()
vocab = FactorVocab()
self._vm = FactorVM(vocab=vocab)
self._evaluator = FactorEvaluator(config=config)
self._market_builder = MarketFeatureBuilder(config=config)
self._sentiment_builder = SentimentFeatureBuilder(config=config)
# 初始化生成器
self._generator = AlphaGenerator(vocab=vocab, config=config)
# 如果有预训练模型,加载它
if self.model_path:
try:
self._generator = AlphaGenerator.load(self.model_path, vocab=vocab)
logger.info(f"Loaded pretrained model from {self.model_path}")
except Exception as e:
logger.warning(f"Failed to load model: {e}")
self._alpha_mining_initialized = True
logger.info("Alpha Mining components initialized")
except ImportError as e:
logger.warning(f"Alpha Mining not available: {e}")
self.enable_alpha_mining = False
async def analyze(
self,
stock_code: str,
stock_name: str,
market_data: Optional[Dict[str, Any]] = None,
sentiment_data: Optional[Dict[str, Any]] = None,
context: str = ""
) -> Dict[str, Any]:
"""
执行量化分析
Args:
stock_code: 股票代码
stock_name: 股票名称
market_data: 行情数据(可选)
sentiment_data: 情感数据(可选)
context: 额外上下文
Returns:
分析结果字典
"""
result = {
"success": True,
"stock_code": stock_code,
"stock_name": stock_name,
"timestamp": datetime.utcnow().isoformat(),
"analysis_type": "quantitative",
"factors_discovered": [],
"technical_analysis": {},
"strategy_suggestion": "",
"confidence": 0.0
}
try:
# 1. 因子挖掘(如果启用)
if self.enable_alpha_mining:
factor_result = await self._mine_factors(
stock_code, stock_name, market_data, sentiment_data
)
result["factors_discovered"] = factor_result.get("factors", [])
result["factor_mining_stats"] = factor_result.get("stats", {})
# 2. 技术分析(使用 LLM)
if self.llm_provider and market_data:
tech_analysis = await self._technical_analysis(
stock_code, stock_name, market_data, context
)
result["technical_analysis"] = tech_analysis
# 3. 生成策略建议
if self.llm_provider:
strategy = await self._generate_strategy(
stock_code, stock_name, result, context
)
result["strategy_suggestion"] = strategy.get("suggestion", "")
result["confidence"] = strategy.get("confidence", 0.0)
except Exception as e:
logger.error(f"Quantitative analysis failed: {e}", exc_info=True)
result["success"] = False
result["error"] = str(e)
return result
async def _mine_factors(
self,
stock_code: str,
stock_name: str,
market_data: Optional[Dict[str, Any]],
sentiment_data: Optional[Dict[str, Any]]
) -> Dict[str, Any]:
"""执行因子挖掘"""
self._init_alpha_mining()
if not self._alpha_mining_initialized:
return {"factors": [], "stats": {"error": "Alpha Mining not available"}}
try:
import torch
from ..alpha_mining.utils import generate_mock_data
# 准备特征数据
if market_data is not None:
market_features = self._market_builder.build(market_data)
time_steps = market_features.size(-1)
if sentiment_data is not None:
sentiment_features = self._sentiment_builder.build(
gitextract_w0u594fz/
├── .deepsource.toml
├── .gitignore
├── LICENSE
├── README.md
├── README_zn.md
├── backend/
│ ├── .gitignore
│ ├── README.md
│ ├── README_zn.md
│ ├── add_raw_html_column.py
│ ├── app/
│ │ ├── __init__.py
│ │ ├── agents/
│ │ │ ├── __init__.py
│ │ │ ├── data_collector.py
│ │ │ ├── data_collector_v2.py
│ │ │ ├── debate_agents.py
│ │ │ ├── news_analyst.py
│ │ │ ├── orchestrator.py
│ │ │ ├── quantitative_agent.py
│ │ │ └── search_analyst.py
│ │ ├── alpha_mining/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── backtest/
│ │ │ │ ├── __init__.py
│ │ │ │ └── evaluator.py
│ │ │ ├── config.py
│ │ │ ├── dsl/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── ops.py
│ │ │ │ └── vocab.py
│ │ │ ├── features/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── market.py
│ │ │ │ └── sentiment.py
│ │ │ ├── model/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── alpha_generator.py
│ │ │ │ └── trainer.py
│ │ │ ├── tools/
│ │ │ │ ├── __init__.py
│ │ │ │ └── alpha_mining_tool.py
│ │ │ ├── utils.py
│ │ │ └── vm/
│ │ │ ├── __init__.py
│ │ │ └── factor_vm.py
│ │ ├── api/
│ │ │ ├── __init__.py
│ │ │ └── v1/
│ │ │ ├── __init__.py
│ │ │ ├── agents.py
│ │ │ ├── alpha_mining.py
│ │ │ ├── analysis.py
│ │ │ ├── debug.py
│ │ │ ├── knowledge_graph.py
│ │ │ ├── llm_config.py
│ │ │ ├── news.py
│ │ │ ├── news_v2.py
│ │ │ ├── stocks.py
│ │ │ └── tasks.py
│ │ ├── config/
│ │ │ ├── __init__.py
│ │ │ └── debate_modes.yaml
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── celery_app.py
│ │ │ ├── config.py
│ │ │ ├── database.py
│ │ │ ├── neo4j_client.py
│ │ │ └── redis_client.py
│ │ ├── financial/
│ │ │ ├── __init__.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── news.py
│ │ │ │ └── stock.py
│ │ │ ├── providers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── eastmoney/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── nbd/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── netease/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── sina/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ ├── tencent/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── fetchers/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── news.py
│ │ │ │ │ └── provider.py
│ │ │ │ └── yicai/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── fetchers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── news.py
│ │ │ │ └── provider.py
│ │ │ ├── registry.py
│ │ │ └── tools.py
│ │ ├── knowledge/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── graph_models.py
│ │ │ ├── graph_service.py
│ │ │ ├── knowledge_extractor.py
│ │ │ └── parallel_search.py
│ │ ├── main.py
│ │ ├── models/
│ │ │ ├── __init__.py
│ │ │ ├── analysis.py
│ │ │ ├── crawl_task.py
│ │ │ ├── database.py
│ │ │ ├── debate_history.py
│ │ │ ├── news.py
│ │ │ └── stock.py
│ │ ├── scripts/
│ │ │ └── init_stocks.py
│ │ ├── services/
│ │ │ ├── __init__.py
│ │ │ ├── analysis_service.py
│ │ │ ├── embedding_service.py
│ │ │ ├── llm_service.py
│ │ │ └── stock_data_service.py
│ │ ├── storage/
│ │ │ ├── __init__.py
│ │ │ └── vector_storage.py
│ │ ├── tasks/
│ │ │ ├── __init__.py
│ │ │ └── crawl_tasks.py
│ │ └── tools/
│ │ ├── __init__.py
│ │ ├── bochaai_search.py
│ │ ├── caijing_crawler.py
│ │ ├── crawler_base.py
│ │ ├── crawler_enhanced.py
│ │ ├── dynamic_crawler_example.py
│ │ ├── eastmoney_crawler.py
│ │ ├── eeo_crawler.py
│ │ ├── interactive_crawler.py
│ │ ├── jingji21_crawler.py
│ │ ├── jwview_crawler.py
│ │ ├── nbd_crawler.py
│ │ ├── netease163_crawler.py
│ │ ├── search_engine_crawler.py
│ │ ├── sina_crawler.py
│ │ ├── tencent_crawler.py
│ │ ├── text_cleaner.py
│ │ └── yicai_crawler.py
│ ├── clear_news_data.py
│ ├── env.example
│ ├── init_db.py
│ ├── init_knowledge_graph.py
│ ├── requirements.txt
│ ├── reset_database.py
│ ├── setup_env.sh
│ ├── start.sh
│ ├── start_celery.sh
│ └── tests/
│ ├── __init__.py
│ ├── check_milvus_data.py
│ ├── check_news_embedding_status.py
│ ├── financial/
│ │ ├── __init__.py
│ │ ├── test_smoke_openbb_models.py
│ │ ├── test_smoke_openbb_provider.py
│ │ └── test_smoke_openbb_tools.py
│ ├── manual_vectorize.py
│ ├── test_alpha_mining/
│ │ ├── __init__.py
│ │ ├── test_integration_p2.py
│ │ ├── test_smoke_p0.py
│ │ └── test_smoke_p1.py
│ └── test_smoke_alpha_mining.py
├── deploy/
│ ├── Dockerfile.celery
│ ├── celery-entrypoint.sh
│ └── docker-compose.dev.yml
├── docs/
│ ├── BochaAI_Web_Search_API_20251222_121535.md
│ └── 天眼查MCP服务_20260104_171528.md
├── frontend/
│ ├── .gitignore
│ ├── QUICKSTART.md
│ ├── README.md
│ ├── index.html
│ ├── package.json
│ ├── postcss.config.js
│ ├── src/
│ │ ├── App.tsx
│ │ ├── components/
│ │ │ ├── DebateChatRoom.tsx
│ │ │ ├── DebateConfig.tsx
│ │ │ ├── DebateHistorySidebar.tsx
│ │ │ ├── HighlightText.tsx
│ │ │ ├── KLineChart.tsx
│ │ │ ├── MentionInput.tsx
│ │ │ ├── ModelSelector.tsx
│ │ │ ├── NewsDetailDrawer.tsx
│ │ │ ├── StockSearch.tsx
│ │ │ ├── alpha-mining/
│ │ │ │ ├── AgentDemo.tsx
│ │ │ │ ├── MetricsDashboard.tsx
│ │ │ │ ├── OperatorGrid.tsx
│ │ │ │ ├── SentimentCompare.tsx
│ │ │ │ ├── TrainingMonitor.tsx
│ │ │ │ └── index.ts
│ │ │ └── ui/
│ │ │ ├── badge.tsx
│ │ │ ├── button.tsx
│ │ │ ├── card.tsx
│ │ │ ├── dropdown-menu.tsx
│ │ │ ├── sheet.tsx
│ │ │ └── tabs.tsx
│ │ ├── context/
│ │ │ └── NewsToolbarContext.tsx
│ │ ├── hooks/
│ │ │ └── useDebounce.ts
│ │ ├── index.css
│ │ ├── layout/
│ │ │ └── MainLayout.tsx
│ │ ├── lib/
│ │ │ ├── api-client.ts
│ │ │ └── utils.ts
│ │ ├── main.tsx
│ │ ├── pages/
│ │ │ ├── AgentMonitorPage.tsx
│ │ │ ├── AlphaMiningPage.tsx
│ │ │ ├── Dashboard.tsx
│ │ │ ├── NewsListPage.tsx
│ │ │ ├── StockAnalysisPage.tsx
│ │ │ ├── StockSearchPage.tsx
│ │ │ └── TaskManagerPage.tsx
│ │ ├── store/
│ │ │ ├── useDebateStore.ts
│ │ │ ├── useLanguageStore.ts
│ │ │ ├── useNewsStore.ts
│ │ │ └── useTaskStore.ts
│ │ └── types/
│ │ └── api.ts
│ ├── tailwind.config.js
│ ├── tsconfig.json
│ ├── tsconfig.node.json
│ └── vite.config.ts
├── legacy_v1/
│ ├── .deepsource.toml
│ ├── Chinese_Stop_Words.txt
│ ├── Crawler/
│ │ ├── __init__.py
│ │ ├── crawler_cnstock.py
│ │ ├── crawler_jrj.py
│ │ ├── crawler_nbd.py
│ │ ├── crawler_sina.py
│ │ ├── crawler_stcn.py
│ │ └── crawler_tushare.py
│ ├── README_OLD.md
│ ├── Text_Analysis/
│ │ ├── __init__.py
│ │ ├── text_mining.py
│ │ └── text_processing.py
│ ├── finance_dict.txt
│ ├── run_crawler_cnstock.py
│ ├── run_crawler_jrj.py
│ ├── run_crawler_nbd.py
│ ├── run_crawler_sina.py
│ ├── run_crawler_stcn.py
│ ├── run_crawler_tushare.py
│ ├── run_main.py
│ └── src/
│ ├── Gon/
│ │ ├── __init__.py
│ │ ├── cnstockspyder.py
│ │ ├── history_starter_cnstock.py
│ │ ├── history_starter_jrj.py
│ │ ├── history_starter_nbd.py
│ │ ├── history_starter_stock_price.py
│ │ ├── ifengspyder.py
│ │ ├── jrjspyder.py
│ │ ├── kill_realtime_spyder_tasks.py
│ │ ├── money163spyder.py
│ │ ├── nbdspyder.py
│ │ ├── realtime_starter_cnstock.py
│ │ ├── realtime_starter_jrj.py
│ │ ├── realtime_starter_nbd.py
│ │ ├── realtime_starter_redis_queue.py
│ │ ├── realtime_starter_stock_price.py
│ │ ├── sinaspyder.py
│ │ ├── spyder.py
│ │ └── stockinfospyder.py
│ ├── Hisoka/
│ │ └── classifier.py
│ ├── Killua/
│ │ ├── __init__.py
│ │ ├── buildstocknewsdb.py
│ │ ├── deduplication.py
│ │ └── denull.py
│ ├── Kite/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── database.py
│ │ ├── log.py
│ │ ├── utils.py
│ │ └── webserver.py
│ ├── Leorio/
│ │ ├── __init__.py
│ │ ├── chnstopwords.txt
│ │ ├── financedict.txt
│ │ ├── tokenization.py
│ │ └── topicmodelling.py
│ ├── __init__.py
│ ├── history_spyder_startup.bat
│ ├── main.py
│ ├── realtime_spyder_startup.bat
│ └── realtime_spyder_stopall.bat
├── reset_all_data.sh
└── thirdparty/
├── DISC-FinLLM.md
├── ElegantRL.md
├── FinCast-fts.md
├── FinGPT.md
├── FinGenius.md
├── FinRL-Meta.md
├── FinRL.md
├── FinRobot.md
├── FinceptTerminal.md
├── Kronos.md
├── Lean.md
├── README.md
├── TradingAgents-CN.md
├── TradingAgents.md
├── TrendRadar.md
├── agentic-trading.md
├── awesome-quant.md
├── backtrader.md
├── investor-agent.md
├── panda_quantflow.md
├── qlib.md
└── vnpy.md
SYMBOL INDEX (1422 symbols across 161 files)
FILE: backend/add_raw_html_column.py
function add_raw_html_column (line 23) | def add_raw_html_column():
FILE: backend/app/agents/data_collector.py
class DataCollectorAgent (line 19) | class DataCollectorAgent(Agent):
method __init__ (line 22) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method collect_data (line 42) | async def collect_data(
method _collect_news (line 103) | async def _collect_news(self, stock_code: str, stock_name: str) -> Lis...
method _collect_financial (line 124) | async def _collect_financial(self, stock_code: str) -> Dict[str, Any]:
method _collect_fund_flow (line 134) | async def _collect_fund_flow(self, stock_code: str) -> Dict[str, Any]:
method _collect_realtime_quote (line 144) | async def _collect_realtime_quote(self, stock_code: str) -> Dict[str, ...
method _generate_summary (line 154) | async def _generate_summary(self, data: Dict[str, Any]) -> str:
method analyze_data_quality (line 205) | async def analyze_data_quality(self, data: Dict[str, Any]) -> Dict[str...
class QuickAnalystAgent (line 257) | class QuickAnalystAgent(Agent):
method __init__ (line 260) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method quick_analyze (line 275) | async def quick_analyze(
FILE: backend/app/agents/data_collector_v2.py
class SearchSource (line 31) | class SearchSource(str, Enum):
class SearchTask (line 40) | class SearchTask(BaseModel):
class SearchPlan (line 51) | class SearchPlan(BaseModel):
class SearchResult (line 63) | class SearchResult(BaseModel):
class DataCollectorAgentV2 (line 74) | class DataCollectorAgentV2(Agent):
method __init__ (line 126) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method generate_search_plan (line 155) | async def generate_search_plan(
method _create_task (line 238) | def _create_task(
method _analyze_with_llm (line 280) | async def _analyze_with_llm(
method _get_icon_for_source (line 347) | def _get_icon_for_source(self, source: SearchSource) -> str:
method execute_search_plan (line 358) | async def execute_search_plan(
method _execute_task (line 432) | async def _execute_task(
method _search_akshare (line 481) | async def _search_akshare(
method _search_bochaai (line 525) | async def _search_bochaai(
method _search_browser (line 561) | async def _search_browser(self, query: str) -> Optional[Dict[str, Any]]:
method _search_knowledge_base (line 593) | async def _search_knowledge_base(
method _generate_combined_summary (line 622) | async def _generate_combined_summary(
method collect_data (line 685) | async def collect_data(
class QuickAnalystAgent (line 727) | class QuickAnalystAgent(Agent):
method __init__ (line 730) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method quick_analyze (line 745) | async def quick_analyze(
function create_data_collector (line 791) | def create_data_collector(llm_provider=None) -> DataCollectorAgentV2:
FILE: backend/app/agents/debate_agents.py
class BullResearcherAgent (line 26) | class BullResearcherAgent(Agent):
method __init__ (line 33) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method analyze (line 53) | def analyze(
method debate_round (line 133) | async def debate_round(self, prompt: str, enable_data_request: bool = ...
method _summarize_news (line 166) | def _summarize_news(self, news_list: List[Dict[str, Any]]) -> str:
class BearResearcherAgent (line 188) | class BearResearcherAgent(Agent):
method __init__ (line 195) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method analyze (line 215) | def analyze(
method _summarize_news (line 296) | def _summarize_news(self, news_list: List[Dict[str, Any]]) -> str:
method debate_round (line 317) | async def debate_round(self, prompt: str, enable_data_request: bool = ...
class InvestmentManagerAgent (line 351) | class InvestmentManagerAgent(Agent):
method __init__ (line 358) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method make_decision (line 378) | def make_decision(
method _extract_rating (line 501) | def _extract_rating(self, text: str) -> str:
class DebateWorkflow (line 512) | class DebateWorkflow:
method __init__ (line 518) | def __init__(self, llm_provider=None):
method run_debate (line 528) | async def run_debate(
method _log_step (line 629) | def _log_step(self, step_name: str, data: Dict[str, Any]):
function create_debate_workflow (line 641) | def create_debate_workflow(llm_provider=None) -> DebateWorkflow:
FILE: backend/app/agents/news_analyst.py
class NewsAnalystAgent (line 15) | class NewsAnalystAgent(Agent):
method __init__ (line 21) | def __init__(
method _init_executor (line 67) | def _init_executor(self, llm_provider=None, tools=None):
method executor (line 83) | def executor(self):
method analyze_news (line 89) | def analyze_news(
method _repair_markdown_table (line 264) | def _repair_markdown_table(self, text: str) -> str:
method _extract_structured_info (line 325) | def _extract_structured_info(self, analysis_text: str) -> Dict[str, Any]:
method batch_analyze (line 420) | def batch_analyze(
function create_news_analyst (line 455) | def create_news_analyst(
FILE: backend/app/agents/orchestrator.py
class DebatePhase (line 22) | class DebatePhase(Enum):
class DebateEvent (line 33) | class DebateEvent:
method __init__ (line 35) | def __init__(
method to_dict (line 52) | def to_dict(self) -> Dict[str, Any]:
class DebateOrchestrator (line 64) | class DebateOrchestrator:
method __init__ (line 67) | def __init__(
method _get_search_analyst (line 109) | def _get_search_analyst(self):
method on_event (line 116) | def on_event(self, callback: Callable[[DebateEvent], None]):
method _emit_event (line 120) | def _emit_event(self, event: DebateEvent):
method interrupt (line 129) | def interrupt(self, reason: str = "manager_decision"):
method run (line 140) | async def run(
method _run_parallel_mode (line 200) | async def _run_parallel_mode(
method _run_realtime_debate_mode (line 296) | async def _run_realtime_debate_mode(
method _process_speech_for_search (line 618) | async def _process_speech_for_search(
method _run_quick_mode (line 681) | async def _run_quick_mode(
method _prepare_news_summary (line 720) | def _prepare_news_summary(self, news_list: List[Dict[str, Any]]) -> str:
method _build_debate_prompt (line 736) | def _build_debate_prompt(
method _format_debate_history (line 783) | def _format_debate_history(self, history: List[Dict]) -> str:
method _check_manager_interrupt (line 797) | async def _check_manager_interrupt(
method _check_manager_interrupt_or_search (line 822) | async def _check_manager_interrupt_or_search(
function create_orchestrator (line 899) | def create_orchestrator(
FILE: backend/app/agents/quantitative_agent.py
class QuantitativeAgent (line 23) | class QuantitativeAgent:
method __init__ (line 39) | def __init__(
method _init_alpha_mining (line 63) | def _init_alpha_mining(self):
method analyze (line 106) | async def analyze(
method _mine_factors (line 170) | async def _mine_factors(
method _technical_analysis (line 256) | async def _technical_analysis(
method _generate_strategy (line 309) | async def _generate_strategy(
method _summarize_market_data (line 372) | def _summarize_market_data(self, market_data: Dict[str, Any]) -> str:
method evaluate_factor (line 389) | async def evaluate_factor(
method get_best_factors (line 444) | def get_best_factors(self, top_k: int = 5) -> List[Dict[str, Any]]:
function create_quantitative_agent (line 454) | def create_quantitative_agent(
FILE: backend/app/agents/search_analyst.py
class SearchSource (line 26) | class SearchSource(Enum):
class SearchAnalystAgent (line 35) | class SearchAnalystAgent(Agent):
method __init__ (line 50) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method extract_search_requests (line 79) | def extract_search_requests(self, text: str) -> List[Dict[str, Any]]:
method search (line 115) | async def search(
method _search_akshare (line 181) | async def _search_akshare(
method _search_bochaai (line 237) | async def _search_bochaai(
method _search_browser (line 278) | async def _search_browser(self, query: str) -> Optional[Dict[str, Any]]:
method _search_knowledge_base (line 310) | async def _search_knowledge_base(
method _generate_summary (line 346) | async def _generate_summary(self, query: str, data: Dict[str, Any]) ->...
method process_debate_speech (line 403) | async def process_debate_speech(
method smart_data_supplement (line 476) | async def smart_data_supplement(
method _format_debate_history (line 542) | def _format_debate_history(self, history: List[Dict[str, Any]]) -> str:
function create_search_analyst (line 557) | def create_search_analyst(llm_provider=None) -> SearchAnalystAgent:
FILE: backend/app/alpha_mining/backtest/evaluator.py
class FactorEvaluator (line 25) | class FactorEvaluator:
method __init__ (line 41) | def __init__(
method evaluate (line 59) | def evaluate(
method _evaluate_single (line 101) | def _evaluate_single(
method _factor_to_signal (line 145) | def _factor_to_signal(self, factor: np.ndarray) -> np.ndarray:
method _signal_to_position (line 157) | def _signal_to_position(self, signal: np.ndarray) -> np.ndarray:
method _calc_sortino (line 169) | def _calc_sortino(self, returns: np.ndarray) -> float:
method _calc_sharpe (line 186) | def _calc_sharpe(self, returns: np.ndarray) -> float:
method _calc_ic (line 194) | def _calc_ic(self, factor: np.ndarray, returns: np.ndarray) -> float:
method _calc_rank_ic (line 209) | def _calc_rank_ic(self, factor: np.ndarray, returns: np.ndarray) -> fl...
method _calc_max_drawdown (line 226) | def _calc_max_drawdown(self, returns: np.ndarray) -> float:
method get_reward (line 235) | def get_reward(
method compare_factors (line 267) | def compare_factors(
method rank_factors (line 293) | def rank_factors(
FILE: backend/app/alpha_mining/config.py
class AlphaMiningConfig (line 16) | class AlphaMiningConfig:
method torch_device (line 65) | def torch_device(self) -> torch.device:
method all_features (line 70) | def all_features(self) -> List[str]:
method num_features (line 78) | def num_features(self) -> int:
FILE: backend/app/alpha_mining/dsl/ops.py
function ts_delay (line 22) | def ts_delay(x: torch.Tensor, d: int = 1) -> torch.Tensor:
function ts_delta (line 43) | def ts_delta(x: torch.Tensor, d: int = 1) -> torch.Tensor:
function ts_mean (line 57) | def ts_mean(x: torch.Tensor, window: int = 5) -> torch.Tensor:
function ts_std (line 83) | def ts_std(x: torch.Tensor, window: int = 5) -> torch.Tensor:
function _op_gate (line 108) | def _op_gate(condition: torch.Tensor, x: torch.Tensor, y: torch.Tensor) ...
function _op_jump (line 118) | def _op_jump(x: torch.Tensor) -> torch.Tensor:
function _op_decay (line 130) | def _op_decay(x: torch.Tensor) -> torch.Tensor:
function _op_max3 (line 139) | def _op_max3(x: torch.Tensor) -> torch.Tensor:
function get_op_names (line 189) | def get_op_names() -> List[str]:
function get_op_by_name (line 194) | def get_op_by_name(name: str) -> Tuple[Callable, int]:
function get_num_ops (line 213) | def get_num_ops() -> int:
FILE: backend/app/alpha_mining/dsl/vocab.py
class FactorVocab (line 32) | class FactorVocab:
method __post_init__ (line 48) | def __post_init__(self):
method vocab_size (line 62) | def vocab_size(self) -> int:
method num_features (line 67) | def num_features(self) -> int:
method num_ops (line 72) | def num_ops(self) -> int:
method feature_offset (line 77) | def feature_offset(self) -> int:
method token_to_name (line 81) | def token_to_name(self, token: int) -> str:
method name_to_token (line 98) | def name_to_token(self, name: str) -> int:
method is_feature (line 115) | def is_feature(self, token: int) -> bool:
method is_operator (line 119) | def is_operator(self, token: int) -> bool:
method get_operator_arity (line 123) | def get_operator_arity(self, token: int) -> int:
method get_operator_func (line 142) | def get_operator_func(self, token: int):
method get_all_tokens (line 161) | def get_all_tokens(self) -> List[int]:
method get_feature_tokens (line 165) | def get_feature_tokens(self) -> List[int]:
method get_operator_tokens (line 169) | def get_operator_tokens(self) -> List[int]:
method __repr__ (line 173) | def __repr__(self) -> str:
FILE: backend/app/alpha_mining/features/market.py
class MarketFeatureBuilder (line 24) | class MarketFeatureBuilder:
method __init__ (line 43) | def __init__(
method build (line 58) | def build(
method _build_from_dataframe (line 84) | def _build_from_dataframe(
method _build_from_tensors (line 142) | def _build_from_tensors(
method _calc_returns (line 194) | def _calc_returns(self, close: torch.Tensor) -> torch.Tensor:
method _calc_volatility (line 208) | def _calc_volatility(self, returns: torch.Tensor, window: int) -> torc...
method _calc_pct_change (line 224) | def _calc_pct_change(self, x: torch.Tensor) -> torch.Tensor:
method _robust_normalize (line 237) | def _robust_normalize(self, features: torch.Tensor) -> torch.Tensor:
method get_feature_names (line 261) | def get_feature_names(self) -> List[str]:
method build_batch (line 265) | def build_batch(
FILE: backend/app/alpha_mining/features/sentiment.py
class SentimentFeatureBuilder (line 27) | class SentimentFeatureBuilder:
method __init__ (line 46) | def __init__(
method build (line 61) | def build(
method _parse_dataframe (line 116) | def _parse_dataframe(self, df: pd.DataFrame):
method _parse_dict (line 140) | def _parse_dict(self, data: Dict[str, Any]):
method _parse_list (line 150) | def _parse_list(self, data: List[Dict]):
method _align_time_steps (line 163) | def _align_time_steps(self, x: torch.Tensor, target_len: int) -> torch...
method _apply_decay (line 177) | def _apply_decay(self, sentiment: torch.Tensor) -> torch.Tensor:
method _normalize (line 194) | def _normalize(self, features: torch.Tensor) -> torch.Tensor:
method get_feature_names (line 209) | def get_feature_names(self) -> List[str]:
method build_from_finnews (line 213) | def build_from_finnews(
method _generate_mock_sentiment (line 258) | def _generate_mock_sentiment(
method combine_with_market (line 279) | def combine_with_market(
FILE: backend/app/alpha_mining/model/alpha_generator.py
class AlphaGenerator (line 28) | class AlphaGenerator(nn.Module):
method __init__ (line 44) | def __init__(
method _init_weights (line 93) | def _init_weights(self):
method forward (line 104) | def forward(
method generate (line 143) | def generate(
method generate_with_training (line 203) | def generate_with_training(
method save (line 255) | def save(self, path: str):
method load (line 266) | def load(cls, path: str, vocab: Optional[FactorVocab] = None) -> 'Alph...
FILE: backend/app/alpha_mining/model/trainer.py
class AlphaTrainer (line 31) | class AlphaTrainer:
method __init__ (line 44) | def __init__(
method _default_evaluator (line 78) | def _default_evaluator(self, factor: torch.Tensor, returns: torch.Tens...
method train_step (line 106) | def train_step(
method train (line 202) | def train(
method _save_checkpoint (line 274) | def _save_checkpoint(self):
method get_best_formula (line 294) | def get_best_formula(self) -> Optional[str]:
method get_training_history (line 298) | def get_training_history(self) -> List[Dict[str, Any]]:
FILE: backend/app/alpha_mining/tools/alpha_mining_tool.py
class AlphaMiningTool (line 43) | class AlphaMiningTool(BaseTool[Dict[str, Any]]):
method __init__ (line 64) | def __init__(
method _setup_parameters (line 115) | def _setup_parameters(self) -> None:
method execute (line 179) | def execute(self, parameters: Dict[str, Any], context: ToolContext) ->...
method aexecute (line 221) | async def aexecute(self, parameters: Dict[str, Any], context: ToolCont...
method _action_mine (line 226) | def _action_mine(self, params: Dict[str, Any], context: ToolContext) -...
method _action_evaluate (line 277) | def _action_evaluate(self, params: Dict[str, Any], context: ToolContex...
method _action_generate (line 316) | def _action_generate(self, params: Dict[str, Any], context: ToolContex...
method _action_list (line 354) | def _action_list(self, params: Dict[str, Any], context: ToolContext) -...
method _prepare_features (line 367) | def _prepare_features(
method _parse_formula (line 405) | def _parse_formula(self, formula_str: str) -> Optional[List[int]]:
FILE: backend/app/alpha_mining/utils.py
function generate_mock_data (line 17) | def generate_mock_data(
function _rolling_std (line 99) | def _rolling_std(x: torch.Tensor, window: int = 20) -> torch.Tensor:
function _pct_change (line 123) | def _pct_change(x: torch.Tensor) -> torch.Tensor:
function _robust_normalize (line 140) | def _robust_normalize(x: torch.Tensor) -> torch.Tensor:
function set_random_seed (line 165) | def set_random_seed(seed: int):
function get_device (line 173) | def get_device() -> torch.device:
FILE: backend/app/alpha_mining/vm/factor_vm.py
class FactorVM (line 25) | class FactorVM:
method __init__ (line 38) | def __init__(self, vocab: Optional[FactorVocab] = None):
method execute (line 47) | def execute(
method decode (line 126) | def decode(self, formula: List[int]) -> str:
method validate (line 184) | def validate(self, formula: List[int]) -> bool:
method get_required_features (line 218) | def get_required_features(self, formula: List[int]) -> List[int]:
FILE: backend/app/api/v1/agents.py
function get_prompts (line 34) | def get_prompts(language: str = "zh") -> Dict[str, str]:
class DebateRequest (line 209) | class DebateRequest(BaseModel):
class DebateResponse (line 220) | class DebateResponse(BaseModel):
class AgentLogEntry (line 237) | class AgentLogEntry(BaseModel):
class AgentMetrics (line 249) | class AgentMetrics(BaseModel):
class TrajectoryStep (line 259) | class TrajectoryStep(BaseModel):
class SearchPlanRequest (line 271) | class SearchPlanRequest(BaseModel):
class SearchExecuteRequest (line 278) | class SearchExecuteRequest(BaseModel):
function run_stock_debate (line 286) | async def run_stock_debate(
function generate_debate_stream (line 477) | async def generate_debate_stream(
function run_stock_debate_stream (line 848) | async def run_stock_debate_stream(
class FollowUpRequest (line 926) | class FollowUpRequest(BaseModel):
function generate_followup_stream (line 935) | async def generate_followup_stream(
function debate_followup (line 1019) | async def debate_followup(request: FollowUpRequest):
function execute_search (line 1105) | async def execute_search(request: SearchExecuteRequest):
function get_debate_result (line 1147) | async def get_debate_result(debate_id: str):
function get_agent_logs (line 1172) | async def get_agent_logs(
function get_agent_metrics (line 1202) | async def get_agent_metrics():
function get_debate_trajectory (line 1272) | async def get_debate_trajectory(debate_id: str):
function clear_logs (line 1301) | async def clear_logs():
function get_available_agents (line 1312) | async def get_available_agents():
class DebateHistoryRequest (line 1368) | class DebateHistoryRequest(BaseModel):
class DebateHistoryResponse (line 1374) | class DebateHistoryResponse(BaseModel):
function get_debate_history (line 1383) | async def get_debate_history(
function save_debate_history (line 1438) | async def save_debate_history(
function delete_debate_history (line 1531) | async def delete_debate_history(
FILE: backend/app/api/v1/alpha_mining.py
class MineRequest (line 43) | class MineRequest(BaseModel):
class EvaluateRequest (line 51) | class EvaluateRequest(BaseModel):
class GenerateRequest (line 57) | class GenerateRequest(BaseModel):
class FactorResponse (line 63) | class FactorResponse(BaseModel):
class MineResponse (line 73) | class MineResponse(BaseModel):
class EvaluateResponse (line 81) | class EvaluateResponse(BaseModel):
class GenerateResponse (line 89) | class GenerateResponse(BaseModel):
class TaskStatusResponse (line 97) | class TaskStatusResponse(BaseModel):
class SentimentCompareRequest (line 108) | class SentimentCompareRequest(BaseModel):
class SentimentCompareResponse (line 114) | class SentimentCompareResponse(BaseModel):
class AgentDemoRequest (line 122) | class AgentDemoRequest(BaseModel):
class AgentDemoResponse (line 129) | class AgentDemoResponse(BaseModel):
function _get_alpha_mining_components (line 144) | def _get_alpha_mining_components():
function _run_mining_task (line 179) | async def _run_mining_task(task_id: str, request: MineRequest):
function mine_factors (line 254) | async def mine_factors(
function mine_factors_stream (line 285) | async def mine_factors_stream(request: MineRequest):
function compare_sentiment_effect (line 406) | async def compare_sentiment_effect(request: SentimentCompareRequest):
function agent_alpha_mining_demo (line 481) | async def agent_alpha_mining_demo(request: AgentDemoRequest):
function evaluate_factor (line 566) | async def evaluate_factor(request: EvaluateRequest):
function generate_factors (line 642) | async def generate_factors(request: GenerateRequest):
function get_factors (line 700) | async def get_factors(
function get_task_status (line 727) | async def get_task_status(task_id: str):
function get_operators (line 748) | async def get_operators():
function delete_task (line 775) | async def delete_task(task_id: str):
FILE: backend/app/api/v1/analysis.py
class AnalysisRequest (line 22) | class AnalysisRequest(BaseModel):
class AnalysisResponse (line 28) | class AnalysisResponse(BaseModel):
class AnalysisDetailResponse (line 41) | class AnalysisDetailResponse(BaseModel):
class BatchAnalyzeRequest (line 58) | class BatchAnalyzeRequest(BaseModel):
class BatchAnalyzeResponse (line 65) | class BatchAnalyzeResponse(BaseModel):
function run_analysis_task (line 76) | async def run_analysis_task(news_id: int, db: AsyncSession):
function batch_analyze_news (line 93) | async def batch_analyze_news(
function analyze_news (line 186) | async def analyze_news(
function get_news_analyses (line 251) | async def get_news_analyses(
function get_analysis_detail (line 272) | async def get_analysis_detail(
FILE: backend/app/api/v1/debug.py
class CrawlRequest (line 16) | class CrawlRequest(BaseModel):
class CrawlResponse (line 21) | class CrawlResponse(BaseModel):
function extract_chinese_ratio (line 31) | def extract_chinese_ratio(text: str) -> float:
function clean_text (line 40) | def clean_text(text: str) -> str:
function is_noise_text (line 48) | def is_noise_text(text: str) -> bool:
function extract_content_from_html (line 78) | def extract_content_from_html(html: str, url: str) -> tuple[str, str, di...
function debug_crawl (line 158) | async def debug_crawl(request: CrawlRequest):
function test_sina_crawl (line 194) | async def test_sina_crawl():
FILE: backend/app/api/v1/knowledge_graph.py
class CompanyGraphResponse (line 17) | class CompanyGraphResponse(BaseModel):
class BuildGraphRequest (line 31) | class BuildGraphRequest(BaseModel):
class BuildGraphResponse (line 36) | class BuildGraphResponse(BaseModel):
class UpdateGraphRequest (line 43) | class UpdateGraphRequest(BaseModel):
class GraphStatsResponse (line 49) | class GraphStatsResponse(BaseModel):
function get_company_graph (line 60) | async def get_company_graph(stock_code: str):
function build_company_graph (line 120) | async def build_company_graph(
function update_company_graph (line 201) | async def update_company_graph(
function delete_company_graph (line 298) | async def delete_company_graph(stock_code: str):
function get_graph_stats (line 326) | async def get_graph_stats():
FILE: backend/app/api/v1/llm_config.py
class ModelInfo (line 17) | class ModelInfo(BaseModel):
class ProviderInfo (line 24) | class ProviderInfo(BaseModel):
class LLMConfigResponse (line 33) | class LLMConfigResponse(BaseModel):
function parse_models (line 40) | def parse_models(models_str: str, provider_label: str) -> List[ModelInfo]:
function get_llm_config (line 67) | async def get_llm_config():
FILE: backend/app/api/v1/news.py
class NewsResponse (line 22) | class NewsResponse(BaseModel):
class CrawlRequest (line 37) | class CrawlRequest(BaseModel):
class CrawlResponse (line 44) | class CrawlResponse(BaseModel):
class BatchDeleteRequest (line 53) | class BatchDeleteRequest(BaseModel):
class BatchDeleteResponse (line 58) | class BatchDeleteResponse(BaseModel):
function crawl_and_save_news_sync (line 66) | def crawl_and_save_news_sync(
function crawl_news (line 156) | async def crawl_news(
function refresh_news (line 190) | async def refresh_news(
function get_news_list (line 220) | async def get_news_list(
function get_latest_news (line 252) | async def get_latest_news(
function get_news_detail (line 282) | async def get_news_detail(
function batch_delete_news (line 310) | async def batch_delete_news(
function delete_news (line 361) | async def delete_news(
FILE: backend/app/api/v1/news_v2.py
class NewsDataResponse (line 29) | class NewsDataResponse(BaseModel):
class FetchNewsResponse (line 43) | class FetchNewsResponse(BaseModel):
class ProviderInfoResponse (line 53) | class ProviderInfoResponse(BaseModel):
function fetch_news_realtime (line 63) | async def fetch_news_realtime(
function list_providers (line 147) | async def list_providers():
function test_provider (line 172) | async def test_provider(
FILE: backend/app/api/v1/stocks.py
class StockInfo (line 30) | class StockInfo(BaseModel):
class StockNewsItem (line 43) | class StockNewsItem(BaseModel):
class SentimentTrendPoint (line 55) | class SentimentTrendPoint(BaseModel):
class StockOverview (line 65) | class StockOverview(BaseModel):
class KLineDataPoint (line 77) | class KLineDataPoint(BaseModel):
class StockSearchResult (line 97) | class StockSearchResult(BaseModel):
function search_stocks_realtime (line 107) | async def search_stocks_realtime(
class StockInitResponse (line 148) | class StockInitResponse(BaseModel):
function init_stock_data (line 156) | async def init_stock_data(
function get_stock_count (line 219) | async def get_stock_count(db: AsyncSession = Depends(get_db)):
function get_stock_overview (line 232) | async def get_stock_overview(
function get_stock_news (line 337) | async def get_stock_news(
function delete_stock_news (line 415) | async def delete_stock_news(
function get_sentiment_trend (line 473) | async def get_sentiment_trend(
function get_kline_data (line 544) | async def get_kline_data(
class RealtimeQuote (line 584) | class RealtimeQuote(BaseModel):
function get_realtime_quote (line 600) | async def get_realtime_quote(
function search_stocks_db (line 620) | async def search_stocks_db(
class TargetedCrawlRequest (line 649) | class TargetedCrawlRequest(BaseModel):
class TargetedCrawlResponse (line 655) | class TargetedCrawlResponse(BaseModel):
class TargetedCrawlStatus (line 663) | class TargetedCrawlStatus(BaseModel):
function start_targeted_crawl (line 678) | async def start_targeted_crawl(
function get_targeted_crawl_status (line 755) | async def get_targeted_crawl_status(
function cancel_targeted_crawl (line 826) | async def cancel_targeted_crawl(
function clear_stock_data_cache (line 888) | async def clear_stock_data_cache(
FILE: backend/app/api/v1/tasks.py
class TaskResponse (line 22) | class TaskResponse(BaseModel):
class ColdStartRequest (line 45) | class ColdStartRequest(BaseModel):
class ColdStartResponse (line 52) | class ColdStartResponse(BaseModel):
class RealtimeCrawlRequest (line 60) | class RealtimeCrawlRequest(BaseModel):
class RealtimeCrawlResponse (line 66) | class RealtimeCrawlResponse(BaseModel):
function get_tasks_list (line 75) | async def get_tasks_list(
function get_task_detail (line 111) | async def get_task_detail(
function trigger_cold_start (line 139) | async def trigger_cold_start(
function trigger_realtime_crawl (line 176) | async def trigger_realtime_crawl(
function get_task_stats (line 213) | async def get_task_stats(
function delete_task (line 258) | async def delete_task(
FILE: backend/app/config/__init__.py
class AgentConfig (line 15) | class AgentConfig(BaseModel):
class FlowStep (line 22) | class FlowStep(BaseModel):
class FlowConfig (line 32) | class FlowConfig(BaseModel):
class ModeRules (line 38) | class ModeRules(BaseModel):
class DebateRules (line 51) | class DebateRules(BaseModel):
class DebateModeConfig (line 59) | class DebateModeConfig(BaseModel):
class LLMConfig (line 70) | class LLMConfig(BaseModel):
class DataSourceConfig (line 78) | class DataSourceConfig(BaseModel):
class DataSourcesConfig (line 84) | class DataSourcesConfig(BaseModel):
class OutputConfig (line 90) | class OutputConfig(BaseModel):
class GlobalConfig (line 97) | class GlobalConfig(BaseModel):
class DebateModesConfig (line 104) | class DebateModesConfig(BaseModel):
function load_debate_modes_config (line 113) | def load_debate_modes_config() -> DebateModesConfig:
function get_mode_config (line 130) | def get_mode_config(mode_name: str) -> Optional[DebateModeConfig]:
function get_available_modes (line 136) | def get_available_modes() -> List[Dict[str, Any]]:
function get_default_mode (line 151) | def get_default_mode() -> str:
function get_cached_config (line 161) | def get_cached_config() -> DebateModesConfig:
function reload_config (line 169) | def reload_config() -> DebateModesConfig:
FILE: backend/app/core/config.py
class Settings (line 10) | class Settings(BaseSettings):
method DATABASE_URL (line 36) | def DATABASE_URL(self) -> str:
method SYNC_DATABASE_URL (line 44) | def SYNC_DATABASE_URL(self) -> str:
method REDIS_URL (line 58) | def REDIS_URL(self) -> str:
function get_settings (line 187) | def get_settings() -> Settings:
FILE: backend/app/core/database.py
function get_db (line 14) | async def get_db() -> AsyncGenerator[AsyncSession, None]:
function init_database (line 37) | def init_database():
FILE: backend/app/core/neo4j_client.py
class Neo4jClient (line 15) | class Neo4jClient:
method __init__ (line 18) | def __init__(
method connect (line 39) | def connect(self):
method close (line 57) | def close(self):
method session (line 65) | def session(self):
method execute_query (line 76) | def execute_query(
method execute_write (line 95) | def execute_write(
method is_connected (line 114) | def is_connected(self) -> bool:
method health_check (line 118) | def health_check(self) -> bool:
function get_neo4j_client (line 136) | def get_neo4j_client() -> Neo4jClient:
function close_neo4j_client (line 145) | def close_neo4j_client():
FILE: backend/app/core/redis_client.py
class RedisClient (line 15) | class RedisClient:
method __init__ (line 18) | def __init__(self):
method is_available (line 36) | def is_available(self) -> bool:
method get_json (line 46) | def get_json(self, key: str) -> Optional[Any]:
method set_json (line 59) | def set_json(self, key: str, value: Any, ttl: int = None) -> bool:
method get (line 75) | def get(self, key: str) -> Optional[str]:
method set (line 86) | def set(self, key: str, value: str, ttl: int = None) -> bool:
method delete (line 101) | def delete(self, key: str) -> bool:
method exists (line 113) | def exists(self, key: str) -> bool:
method get_cache_metadata (line 124) | def get_cache_metadata(self, key: str) -> Optional[dict]:
method set_with_metadata (line 139) | def set_with_metadata(self, key: str, value: Any, ttl: int = None) -> ...
method clear_pattern (line 147) | def clear_pattern(self, pattern: str) -> int:
FILE: backend/app/financial/models/news.py
class NewsSentiment (line 22) | class NewsSentiment(str, Enum):
class NewsQueryParams (line 29) | class NewsQueryParams(BaseModel):
class Config (line 67) | class Config:
class NewsData (line 77) | class NewsData(BaseModel):
class Config (line 136) | class Config:
method generate_id (line 155) | def generate_id(url: str) -> str:
method to_legacy_dict (line 159) | def to_legacy_dict(self) -> dict:
FILE: backend/app/financial/models/stock.py
class KlineInterval (line 18) | class KlineInterval(str, Enum):
class AdjustType (line 30) | class AdjustType(str, Enum):
class StockQueryParams (line 37) | class StockQueryParams(BaseModel):
class Config (line 63) | class Config:
class StockPriceData (line 74) | class StockPriceData(BaseModel):
class Config (line 94) | class Config:
method to_legacy_dict (line 99) | def to_legacy_dict(self) -> dict:
class StockRealtimeData (line 122) | class StockRealtimeData(BaseModel):
class StockFinancialData (line 138) | class StockFinancialData(BaseModel):
FILE: backend/app/financial/providers/base.py
class ProviderInfo (line 25) | class ProviderInfo:
class BaseFetcher (line 47) | class BaseFetcher(ABC, Generic[QueryT, DataT]):
method __init__ (line 74) | def __init__(self):
method transform_query (line 80) | def transform_query(self, params: QueryT) -> Dict[str, Any]:
method extract_data (line 97) | async def extract_data(self, query: Dict[str, Any]) -> Any:
method transform_data (line 116) | def transform_data(self, raw_data: Any, query: QueryT) -> List[DataT]:
method fetch (line 129) | async def fetch(self, params: QueryT) -> List[DataT]:
method fetch_sync (line 159) | def fetch_sync(self, params: QueryT) -> List[DataT]:
class BaseProvider (line 173) | class BaseProvider(ABC):
method info (line 192) | def info(self) -> ProviderInfo:
method fetchers (line 198) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
method get_fetcher (line 208) | def get_fetcher(self, data_type: str) -> Optional[BaseFetcher]:
method supports (line 223) | def supports(self, data_type: str) -> bool:
method __repr__ (line 235) | def __repr__(self) -> str:
FILE: backend/app/financial/providers/eastmoney/fetchers/news.py
class EastmoneyNewsFetcher (line 19) | class EastmoneyNewsFetcher(BaseFetcher):
method transform_query (line 36) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 45) | def extract_data(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
method transform_data (line 81) | def transform_data(
method _extract_news_links (line 125) | def _extract_news_links(self, soup: BeautifulSoup) -> List[Dict[str, s...
method _fetch_news_detail (line 148) | def _fetch_news_detail(self, link_info: Dict[str, str]) -> Optional[Di...
method _extract_content (line 177) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 200) | def _extract_publish_time(self, soup: BeautifulSoup) -> datetime:
method _parse_time_string (line 213) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 223) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_stock_codes (line 235) | def _extract_stock_codes(self, text: str) -> List[str]:
method _clean_text (line 257) | def _clean_text(self, text: str) -> str:
FILE: backend/app/financial/providers/eastmoney/provider.py
class EastmoneyProvider (line 10) | class EastmoneyProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/providers/nbd/fetchers/news.py
class NbdNewsFetcher (line 19) | class NbdNewsFetcher(BaseFetcher):
method transform_query (line 36) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 45) | def extract_data(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
method transform_data (line 76) | def transform_data(
method _extract_news_links (line 120) | def _extract_news_links(self, soup: BeautifulSoup) -> List[Dict[str, s...
method _fetch_news_detail (line 142) | def _fetch_news_detail(self, link_info: Dict[str, str]) -> Optional[Di...
method _extract_content (line 171) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 203) | def _extract_publish_time(self, soup: BeautifulSoup) -> datetime:
method _parse_time_string (line 214) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 224) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_stock_codes (line 234) | def _extract_stock_codes(self, text: str) -> List[str]:
method _clean_text (line 256) | def _clean_text(self, text: str) -> str:
FILE: backend/app/financial/providers/nbd/provider.py
class NbdProvider (line 10) | class NbdProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/providers/netease/fetchers/news.py
class NeteaseNewsFetcher (line 19) | class NeteaseNewsFetcher(BaseFetcher):
method transform_query (line 36) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 45) | def extract_data(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
method transform_data (line 81) | def transform_data(
method _extract_news_links (line 125) | def _extract_news_links(self, soup: BeautifulSoup) -> List[Dict[str, s...
method _fetch_news_detail (line 148) | def _fetch_news_detail(self, link_info: Dict[str, str]) -> Optional[Di...
method _extract_content (line 177) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 200) | def _extract_publish_time(self, soup: BeautifulSoup) -> datetime:
method _parse_time_string (line 211) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 221) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_stock_codes (line 233) | def _extract_stock_codes(self, text: str) -> List[str]:
method _clean_text (line 255) | def _clean_text(self, text: str) -> str:
FILE: backend/app/financial/providers/netease/provider.py
class NeteaseProvider (line 10) | class NeteaseProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/providers/sina/fetchers/news.py
class SinaNewsFetcher (line 33) | class SinaNewsFetcher(BaseFetcher[NewsQueryParams, NewsData]):
method __init__ (line 68) | def __init__(self):
method _get_session (line 72) | def _get_session(self):
method transform_query (line 82) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 112) | async def extract_data(self, query: Dict[str, Any]) -> List[Dict]:
method _crawl_page (line 144) | async def _crawl_page(self, url: str, max_items: int) -> List[Dict]:
method _fetch_page_sync (line 185) | def _fetch_page_sync(self, url: str):
method _extract_news_links (line 196) | def _extract_news_links(self, soup: BeautifulSoup) -> List[str]:
method _crawl_news_detail (line 211) | async def _crawl_news_detail(self, url: str) -> Optional[Dict]:
method transform_data (line 256) | def transform_data(
method _normalize_symbol (line 295) | def _normalize_symbol(self, code: str) -> str:
method _extract_title (line 305) | def _extract_title(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_meta (line 319) | def _extract_meta(self, soup: BeautifulSoup) -> tuple:
method _extract_date (line 335) | def _extract_date(self, soup: BeautifulSoup) -> Optional[datetime]:
method _parse_date (line 349) | def _parse_date(self, date_text: str) -> Optional[datetime]:
method _extract_stock_codes (line 364) | def _extract_stock_codes(self, soup: BeautifulSoup) -> List[str]:
method _extract_content (line 375) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _is_noise_text (line 417) | def _is_noise_text(self, text: str) -> bool:
method _extract_chinese_ratio (line 425) | def _extract_chinese_ratio(self, text: str) -> float:
FILE: backend/app/financial/providers/sina/provider.py
class SinaProvider (line 10) | class SinaProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/providers/tencent/fetchers/news.py
class TencentNewsFetcher (line 22) | class TencentNewsFetcher(BaseFetcher):
method transform_query (line 39) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 50) | def extract_data(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
method transform_data (line 88) | def transform_data(
method _extract_news_links (line 138) | def _extract_news_links(self, soup: BeautifulSoup) -> List[Dict[str, s...
method _fetch_news_detail (line 157) | def _fetch_news_detail(self, link_info: Dict[str, str]) -> Optional[Di...
method _extract_content (line 186) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 209) | def _extract_publish_time(self, soup: BeautifulSoup) -> datetime:
method _parse_time_string (line 233) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 255) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_stock_codes (line 266) | def _extract_stock_codes(self, text: str) -> List[str]:
method _clean_text (line 288) | def _clean_text(self, text: str) -> str:
FILE: backend/app/financial/providers/tencent/provider.py
class TencentProvider (line 10) | class TencentProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/providers/yicai/fetchers/news.py
class YicaiNewsFetcher (line 19) | class YicaiNewsFetcher(BaseFetcher):
method transform_query (line 36) | def transform_query(self, params: NewsQueryParams) -> Dict[str, Any]:
method extract_data (line 45) | def extract_data(self, query: Dict[str, Any]) -> List[Dict[str, Any]]:
method transform_data (line 76) | def transform_data(
method _extract_news_links (line 120) | def _extract_news_links(self, soup: BeautifulSoup) -> List[Dict[str, s...
method _fetch_news_detail (line 142) | def _fetch_news_detail(self, link_info: Dict[str, str]) -> Optional[Di...
method _extract_content (line 171) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 194) | def _extract_publish_time(self, soup: BeautifulSoup) -> datetime:
method _parse_time_string (line 207) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 217) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_stock_codes (line 227) | def _extract_stock_codes(self, text: str) -> List[str]:
method _clean_text (line 249) | def _clean_text(self, text: str) -> str:
FILE: backend/app/financial/providers/yicai/provider.py
class YicaiProvider (line 10) | class YicaiProvider(BaseProvider):
method info (line 19) | def info(self) -> ProviderInfo:
method fetchers (line 30) | def fetchers(self) -> Dict[str, Type[BaseFetcher]]:
FILE: backend/app/financial/registry.py
class ProviderNotFoundError (line 21) | class ProviderNotFoundError(Exception):
class FetcherNotFoundError (line 26) | class FetcherNotFoundError(Exception):
class ProviderRegistry (line 31) | class ProviderRegistry:
method __new__ (line 54) | def __new__(cls):
method register (line 63) | def register(self, provider: BaseProvider) -> None:
method unregister (line 100) | def unregister(self, name: str) -> bool:
method get_provider (line 117) | def get_provider(self, name: str) -> Optional[BaseProvider]:
method get_fetcher (line 129) | def get_fetcher(
method list_providers (line 184) | def list_providers(self) -> List[str]:
method get_providers_for_type (line 193) | def get_providers_for_type(self, data_type: str) -> List[str]:
method get_all_data_types (line 208) | def get_all_data_types(self) -> List[str]:
method clear (line 220) | def clear(self) -> None:
method __repr__ (line 226) | def __repr__(self) -> str:
function get_registry (line 234) | def get_registry() -> ProviderRegistry:
function reset_registry (line 247) | def reset_registry() -> ProviderRegistry:
FILE: backend/app/financial/tools.py
class FinancialNewsTool (line 28) | class FinancialNewsTool(BaseTool):
method __init__ (line 40) | def __init__(self):
method _setup_parameters (line 49) | def _setup_parameters(self):
method aexecute (line 53) | async def aexecute(
method execute (line 119) | def execute(
class StockPriceTool (line 139) | class StockPriceTool(BaseTool):
method __init__ (line 149) | def __init__(self):
method _setup_parameters (line 158) | def _setup_parameters(self):
method aexecute (line 162) | async def aexecute(
method execute (line 234) | def execute(
function setup_default_providers (line 255) | def setup_default_providers():
FILE: backend/app/knowledge/graph_models.py
class NodeType (line 11) | class NodeType(str, Enum):
class RelationType (line 23) | class RelationType(str, Enum):
class CompanyNode (line 36) | class CompanyNode(BaseModel):
class NameVariantNode (line 49) | class NameVariantNode(BaseModel):
class BusinessNode (line 56) | class BusinessNode(BaseModel):
class IndustryNode (line 68) | class IndustryNode(BaseModel):
class ProductNode (line 76) | class ProductNode(BaseModel):
class KeywordNode (line 85) | class KeywordNode(BaseModel):
class ConceptNode (line 93) | class ConceptNode(BaseModel):
class CompanyKnowledgeGraph (line 101) | class CompanyKnowledgeGraph(BaseModel):
class SearchKeywordSet (line 112) | class SearchKeywordSet(BaseModel):
method get_all_keywords (line 135) | def get_all_keywords(self) -> List[str]:
method generate_search_queries (line 146) | def generate_search_queries(self, max_queries: int = 10) -> List[str]:
FILE: backend/app/knowledge/graph_service.py
class KnowledgeGraphService (line 27) | class KnowledgeGraphService:
method __init__ (line 30) | def __init__(self):
method _ensure_constraints (line 34) | def _ensure_constraints(self):
method create_or_update_company (line 54) | def create_or_update_company(self, company: CompanyNode) -> bool:
method get_company (line 89) | def get_company(self, stock_code: str) -> Optional[Dict[str, Any]]:
method add_name_variants (line 101) | def add_name_variants(
method add_business (line 144) | def add_business(
method stop_business (line 175) | def stop_business(
method add_keywords (line 206) | def add_keywords(
method add_concepts (line 243) | def add_concepts(
method build_company_graph (line 279) | def build_company_graph(self, graph: CompanyKnowledgeGraph) -> bool:
method _add_industry (line 324) | def _add_industry(self, stock_code: str, industry: IndustryNode) -> bool:
method _add_product (line 346) | def _add_product(self, stock_code: str, product: ProductNode) -> bool:
method get_company_graph (line 371) | def get_company_graph(self, stock_code: str) -> Optional[CompanyKnowle...
method get_search_keywords (line 425) | def get_search_keywords(self, stock_code: str) -> Optional[SearchKeywo...
method update_from_news (line 457) | def update_from_news(
method get_graph_stats (line 520) | def get_graph_stats(self, stock_code: str) -> Dict[str, int]:
method delete_company_graph (line 548) | def delete_company_graph(self, stock_code: str) -> bool:
method list_all_companies (line 564) | def list_all_companies(self) -> List[Dict[str, str]]:
function get_graph_service (line 582) | def get_graph_service() -> KnowledgeGraphService:
FILE: backend/app/knowledge/knowledge_extractor.py
class KnowledgeExtractorAgent (line 26) | class KnowledgeExtractorAgent(Agent):
method __init__ (line 32) | def __init__(self, llm_provider=None, organization_id: str = "finnews"):
method extract_from_akshare (line 50) | async def extract_from_akshare(
method extract_from_news (line 206) | async def extract_from_news(
method extract_from_document (line 305) | async def extract_from_document(
class AkshareKnowledgeExtractor (line 387) | class AkshareKnowledgeExtractor:
method extract_company_info (line 393) | def extract_company_info(stock_code: str) -> Optional[Dict[str, Any]]:
method generate_search_keywords (line 490) | def generate_search_keywords(
method build_simple_graph_from_info (line 583) | def build_simple_graph_from_info(
class NewsKnowledgeExtractor (line 673) | class NewsKnowledgeExtractor:
method __init__ (line 678) | def __init__(self, extractor_agent: KnowledgeExtractorAgent):
method extract_business_changes (line 681) | async def extract_business_changes(
function create_knowledge_extractor (line 702) | def create_knowledge_extractor(llm_provider=None) -> KnowledgeExtractorA...
FILE: backend/app/knowledge/parallel_search.py
class ParallelSearchStrategy (line 17) | class ParallelSearchStrategy:
method __init__ (line 23) | def __init__(self, max_workers: int = 5):
method search_with_multiple_keywords (line 32) | def search_with_multiple_keywords(
method _search_single_query (line 93) | def _search_single_query(
method search_async (line 128) | async def search_async(
function create_parallel_search (line 157) | def create_parallel_search(max_workers: int = 5) -> ParallelSearchStrategy:
FILE: backend/app/main.py
class DocsCSPMiddleware (line 25) | class DocsCSPMiddleware(BaseHTTPMiddleware):
method dispatch (line 27) | async def dispatch(self, request: Request, call_next):
function lifespan (line 69) | async def lifespan(app: FastAPI):
function validation_exception_handler (line 147) | async def validation_exception_handler(request: Request, exc: RequestVal...
function global_exception_handler (line 172) | async def global_exception_handler(request, exc):
function root (line 186) | async def root():
function health_check (line 200) | async def health_check():
function custom_swagger_ui_html (line 212) | async def custom_swagger_ui_html():
function swagger_ui_redirect (line 226) | async def swagger_ui_redirect():
function redoc_html (line 234) | async def redoc_html():
function chrome_devtools_config (line 246) | async def chrome_devtools_config():
FILE: backend/app/models/analysis.py
class Analysis (line 11) | class Analysis(Base):
method __repr__ (line 49) | def __repr__(self):
method to_dict (line 52) | def to_dict(self):
FILE: backend/app/models/crawl_task.py
class CrawlMode (line 12) | class CrawlMode(str, Enum):
class TaskStatus (line 19) | class TaskStatus(str, Enum):
class CrawlTask (line 28) | class CrawlTask(Base):
method __repr__ (line 64) | def __repr__(self):
method to_dict (line 67) | def to_dict(self):
FILE: backend/app/models/database.py
function get_async_session (line 47) | async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
function get_sync_session (line 65) | def get_sync_session() -> Session:
function init_db (line 83) | def init_db():
FILE: backend/app/models/debate_history.py
class DebateHistory (line 11) | class DebateHistory(Base):
method __repr__ (line 42) | def __repr__(self):
method to_dict (line 45) | def to_dict(self):
FILE: backend/app/models/news.py
class News (line 12) | class News(Base):
method __repr__ (line 56) | def __repr__(self):
method to_dict (line 59) | def to_dict(self, include_html: bool = False):
FILE: backend/app/models/stock.py
class Stock (line 10) | class Stock(Base):
method __repr__ (line 39) | def __repr__(self):
method to_dict (line 42) | def to_dict(self):
FILE: backend/app/scripts/init_stocks.py
class Stock (line 69) | class Stock(Base):
function get_fallback_stocks (line 87) | def get_fallback_stocks() -> list:
function fetch_all_stocks (line 113) | async def fetch_all_stocks() -> list:
function init_stocks_to_db (line 205) | async def init_stocks_to_db():
function get_stock_count (line 267) | async def get_stock_count():
function main (line 280) | async def main():
FILE: backend/app/services/analysis_service.py
class AnalysisService (line 22) | class AnalysisService:
method __init__ (line 28) | def __init__(self):
method analyze_news (line 35) | async def analyze_news(
method get_analysis_by_id (line 190) | async def get_analysis_by_id(
method get_analyses_by_news_id (line 219) | async def get_analyses_by_news_id(
function get_analysis_service (line 255) | def get_analysis_service() -> AnalysisService:
FILE: backend/app/services/embedding_service.py
class EmbeddingService (line 18) | class EmbeddingService:
method __init__ (line 25) | def __init__(
method _get_cache_key (line 84) | def _get_cache_key(self, text: str) -> str:
method _get_from_cache (line 90) | def _get_from_cache(self, text: str) -> Optional[List[float]]:
method _save_to_cache (line 105) | def _save_to_cache(self, text: str, embedding: List[float]):
method embed_text (line 120) | def embed_text(self, text: str) -> List[float]:
method embed_batch (line 160) | def embed_batch(self, texts: List[str]) -> List[List[float]]:
method aembed_text (line 211) | async def aembed_text(self, text: str) -> List[float]:
method aembed_batch (line 246) | async def aembed_batch(self, texts: List[str]) -> List[List[float]]:
function get_embedding_service (line 301) | def get_embedding_service() -> EmbeddingService:
FILE: backend/app/services/llm_service.py
class LLMService (line 14) | class LLMService:
method __init__ (line 20) | def __init__(
method _create_provider (line 84) | def _create_provider(self) -> Union[LiteLLMProvider, BailianProvider]:
method generate (line 130) | def generate(
method analyze_sentiment (line 170) | def analyze_sentiment(self, text: str) -> Dict[str, Any]:
method summarize (line 228) | def summarize(self, text: str, max_length: int = 200) -> str:
function get_llm_provider (line 260) | def get_llm_provider(
function get_llm_service (line 287) | def get_llm_service() -> LLMService:
function create_custom_llm_provider (line 300) | def create_custom_llm_provider(
FILE: backend/app/services/stock_data_service.py
class StockDataService (line 22) | class StockDataService:
method __init__ (line 48) | def __init__(self):
method _normalize_code (line 51) | def _normalize_code(self, stock_code: str) -> str:
method _get_symbol (line 61) | def _get_symbol(self, stock_code: str) -> str:
method _is_cache_valid (line 68) | def _is_cache_valid(self, key: str, ttl: int = None) -> bool:
method _get_cached (line 78) | def _get_cached(self, key: str, ttl: int = None) -> Optional[Any]:
method _set_cache (line 87) | def _set_cache(self, key: str, data: Any):
method clear_cache (line 91) | def clear_cache(self, pattern: str = None):
method get_kline_data (line 107) | async def get_kline_data(
method _fetch_daily_kline (line 175) | async def _fetch_daily_kline(
method _fetch_minute_kline (line 257) | async def _fetch_minute_kline(
method get_realtime_quote (line 329) | async def get_realtime_quote(self, stock_code: str) -> Optional[Dict[s...
method search_stocks (line 383) | async def search_stocks(
method _get_mock_stock_list (line 452) | def _get_mock_stock_list(self, keyword: str, limit: int) -> List[Dict[...
method get_stock_info (line 474) | async def get_stock_info(self, stock_code: str) -> Optional[Dict[str, ...
method _generate_mock_kline (line 504) | def _generate_mock_kline(self, stock_code: str, days: int) -> List[Dic...
method get_financial_indicators (line 561) | async def get_financial_indicators(self, stock_code: str) -> Optional[...
method _safe_float (line 642) | def _safe_float(self, value, default=None) -> Optional[float]:
method _get_mock_financial_indicators (line 651) | def _get_mock_financial_indicators(self, stock_code: str) -> Dict[str,...
method get_fund_flow (line 667) | async def get_fund_flow(self, stock_code: str, days: int = 5) -> Optio...
method _get_mock_fund_flow (line 753) | def _get_mock_fund_flow(self, stock_code: str) -> Dict[str, Any]:
method get_debate_context (line 766) | async def get_debate_context(self, stock_code: str) -> Dict[str, Any]:
FILE: backend/app/storage/vector_storage.py
class VectorStorage (line 16) | class VectorStorage:
method __init__ (line 23) | def __init__(
method _call_add_async (line 46) | def _call_add_async(self, records: List[VectorRecord], timeout: int = ...
method connect (line 61) | def connect(self):
method create_collection (line 66) | def create_collection(self, drop_existing: bool = False):
method load_collection (line 78) | def load_collection(self):
method store_embedding (line 82) | def store_embedding(
method store_embeddings_batch (line 97) | def store_embeddings_batch(
method search_similar (line 115) | def search_similar(
method delete_by_news_id (line 153) | def delete_by_news_id(self, news_id: int):
method verify_insert (line 157) | def verify_insert(self, news_id: int, wait_for_flush: bool = True) -> ...
method get_stats (line 172) | def get_stats(self) -> Dict[str, Any]:
method disconnect (line 205) | def disconnect(self):
method collection (line 210) | def collection(self):
function get_vector_storage (line 219) | def get_vector_storage() -> VectorStorage:
FILE: backend/app/tasks/crawl_tasks.py
function clean_text_for_db (line 36) | def clean_text_for_db(text: str) -> str:
function get_crawler_tool (line 56) | def get_crawler_tool(source: str):
function get_sync_db_session (line 86) | def get_sync_db_session():
function realtime_crawl_task (line 93) | def realtime_crawl_task(self, source: str = "sina", force_refresh: bool ...
function cold_start_crawl_task (line 300) | def cold_start_crawl_task(
function targeted_stock_crawl_task (line 445) | def targeted_stock_crawl_task(
function build_knowledge_graph_task (line 982) | def build_knowledge_graph_task(self, stock_code: str, stock_name: str):
FILE: backend/app/tools/bochaai_search.py
class SearchResult (line 19) | class SearchResult:
class BochaAISearchTool (line 28) | class BochaAISearchTool:
method __init__ (line 34) | def __init__(self, api_key: Optional[str] = None, endpoint: Optional[s...
method is_available (line 51) | def is_available(self) -> bool:
method search (line 55) | def search(
method search_stock_news (line 156) | def search_stock_news(
FILE: backend/app/tools/caijing_crawler.py
class CaijingCrawlerTool (line 16) | class CaijingCrawlerTool(BaseCrawler):
method __init__ (line 27) | def __init__(self):
method crawl (line 33) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 57) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 93) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 158) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 193) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 216) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 228) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 248) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/crawler_base.py
class NewsItem (line 22) | class NewsItem:
method to_dict (line 35) | def to_dict(self) -> Dict[str, Any]:
class BaseCrawler (line 51) | class BaseCrawler(BaseTool):
method __init__ (line 73) | def __init__(self, name: str = "base_crawler", description: str = "Bas...
method _fetch_page (line 91) | def _fetch_page(self, url: str) -> requests.Response:
method _parse_html (line 162) | def _parse_html(self, html: str) -> BeautifulSoup:
method _extract_chinese_ratio (line 174) | def _extract_chinese_ratio(self, text: str) -> float:
method _clean_text (line 191) | def _clean_text(self, text: str) -> str:
method _extract_article_content (line 210) | def _extract_article_content(self, soup: BeautifulSoup, selectors: Lis...
method _is_stock_related_by_url (line 276) | def _is_stock_related_by_url(self, url: str) -> bool:
method _is_stock_related_by_title (line 289) | def _is_stock_related_by_title(self, title: str) -> bool:
method _filter_stock_news (line 301) | def _filter_stock_news(self, news_list: List[NewsItem]) -> List[NewsIt...
method crawl (line 351) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _setup_parameters (line 364) | def _setup_parameters(self):
method execute (line 368) | def execute(self, **kwargs) -> Dict[str, Any]:
method aexecute (line 392) | async def aexecute(self, **kwargs) -> Dict[str, Any]:
FILE: backend/app/tools/crawler_enhanced.py
class Article (line 122) | class Article:
method to_markdown (line 143) | def to_markdown(self, include_title: bool = True, include_meta: bool =...
method to_llm_message (line 171) | def to_llm_message(self) -> List[Dict[str, Any]]:
method to_dict (line 200) | def to_dict(self) -> Dict[str, Any]:
class ContentExtractor (line 221) | class ContentExtractor:
method extract_with_readability (line 228) | def extract_with_readability(html: str) -> Optional[Article]:
method extract_with_selectors (line 251) | def extract_with_selectors(soup: BeautifulSoup, config: dict) -> Optio...
method extract_heuristic (line 292) | def extract_heuristic(soup: BeautifulSoup) -> Optional[Article]:
method extract (line 348) | def extract(cls, html: str, url: str = "", config: dict = None) -> Art...
class JinaReaderEngine (line 388) | class JinaReaderEngine:
method __init__ (line 396) | def __init__(self, api_key: Optional[str] = None):
method crawl (line 399) | def crawl(self, url: str, return_format: str = "html") -> Optional[str]:
class PlaywrightEngine (line 426) | class PlaywrightEngine:
method __init__ (line 432) | def __init__(self, headless: bool = True):
method crawl (line 435) | def crawl(self, url: str, wait_selectors: List[str] = None,
class RequestsEngine (line 488) | class RequestsEngine:
method __init__ (line 500) | def __init__(self, timeout: int = 20):
method crawl (line 506) | def crawl(self, url: str, headers: dict = None, cookies: dict = None) ...
class CrawlCache (line 525) | class CrawlCache:
method __init__ (line 530) | def __init__(self, cache_dir: str = ".crawl_cache", ttl_hours: int = 24):
method _key (line 535) | def _key(self, url: str) -> str:
method get (line 538) | def get(self, url: str) -> Optional[str]:
method set (line 558) | def set(self, url: str, html: str):
class EnhancedCrawler (line 576) | class EnhancedCrawler:
method __init__ (line 582) | def __init__(
method _get_site_config (line 599) | def _get_site_config(self, url: str) -> dict:
method _evaluate_quality (line 609) | def _evaluate_quality(self, article: Article) -> float:
method crawl (line 648) | def crawl(
method crawl_batch (line 756) | def crawl_batch(
function get_crawler (line 802) | def get_crawler() -> EnhancedCrawler:
function crawl_url (line 810) | def crawl_url(url: str, engine: str = 'auto') -> Article:
function crawl_urls (line 815) | def crawl_urls(urls: List[str], engine: str = 'auto') -> List[Article]:
FILE: backend/app/tools/dynamic_crawler_example.py
class DynamicCrawlerExample (line 24) | class DynamicCrawlerExample(BaseCrawler):
method __init__ (line 34) | def __init__(self):
method _init_driver (line 41) | def _init_driver(self):
method _close_driver (line 56) | def _close_driver(self):
method crawl (line 62) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_with_selenium (line 89) | def _crawl_with_selenium(self) -> List[NewsItem]:
method _extract_news_links_from_selenium (line 151) | def _extract_news_links_from_selenium(self) -> List[dict]:
method _extract_news_item (line 174) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
FILE: backend/app/tools/eastmoney_crawler.py
class EastmoneyCrawlerTool (line 16) | class EastmoneyCrawlerTool(BaseCrawler):
method __init__ (line 26) | def __init__(self):
method crawl (line 32) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 56) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 92) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 164) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 199) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 222) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 236) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 256) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/eeo_crawler.py
class EeoCrawlerTool (line 17) | class EeoCrawlerTool(BaseCrawler):
method __init__ (line 33) | def __init__(self):
method crawl (line 39) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _fetch_api_news (line 63) | def _fetch_api_news(self, page: int = 0, prev_uuid: str = "", prev_pub...
method _crawl_page (line 158) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _parse_api_news_item (line 195) | def _parse_api_news_item(self, news_data: dict) -> Optional[NewsItem]:
method _fetch_news_content (line 244) | def _fetch_news_content(self, url: str) -> tuple:
method _crawl_page_html (line 267) | def _crawl_page_html(self) -> List[NewsItem]:
method _extract_news_links (line 297) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 361) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 396) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 443) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 455) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 475) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/interactive_crawler.py
class InteractiveCrawler (line 39) | class InteractiveCrawler:
method __init__ (line 42) | def __init__(self, timeout: int = 15):
method _is_captcha_page (line 70) | def _is_captcha_page(self, html_content: str, soup: BeautifulSoup = No...
method search_on_bing (line 90) | def search_on_bing(
method search_on_baidu (line 193) | def search_on_baidu(
method search_on_baidu_news (line 302) | def search_on_baidu_news(
method search_on_sogou (line 420) | def search_on_sogou(
method search_on_360 (line 516) | def search_on_360(
method interactive_search (line 612) | def interactive_search(
method crawl_page (line 711) | def crawl_page(self, url: str) -> Optional[Dict[str, Any]]:
method crawl_search_results (line 788) | def crawl_search_results(
function create_interactive_crawler (line 839) | def create_interactive_crawler(headless: bool = True, **kwargs) -> Inter...
function search_and_crawl (line 844) | def search_and_crawl(
FILE: backend/app/tools/jingji21_crawler.py
class Jingji21CrawlerTool (line 16) | class Jingji21CrawlerTool(BaseCrawler):
method __init__ (line 27) | def __init__(self):
method crawl (line 33) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 57) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 90) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 112) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 163) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 186) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 198) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 218) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/jwview_crawler.py
class JwviewCrawlerTool (line 16) | class JwviewCrawlerTool(BaseCrawler):
method __init__ (line 27) | def __init__(self):
method crawl (line 33) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 57) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 93) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 119) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 154) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 177) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 189) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 219) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/nbd_crawler.py
class NbdCrawlerTool (line 16) | class NbdCrawlerTool(BaseCrawler):
method __init__ (line 26) | def __init__(self):
method crawl (line 32) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 56) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 89) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 152) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 194) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 249) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 261) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 281) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/netease163_crawler.py
class Netease163CrawlerTool (line 16) | class Netease163CrawlerTool(BaseCrawler):
method __init__ (line 26) | def __init__(self):
method crawl (line 32) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 56) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 89) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 115) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 150) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 173) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 185) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 205) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/search_engine_crawler.py
class SearchEngineCrawler (line 17) | class SearchEngineCrawler:
method __init__ (line 23) | def __init__(self):
method _fetch_url (line 40) | def _fetch_url(self, url: str, timeout: int = 10) -> Optional[str]:
method search_with_engine (line 73) | def search_with_engine(
method _build_search_query (line 122) | def _build_search_query(self, query: str, days: int) -> str:
method _parse_search_results (line 159) | def _parse_search_results(
method _is_news_url (line 202) | def _is_news_url(self, url: str) -> bool:
method _extract_source_from_url (line 211) | def _extract_source_from_url(self, url: str) -> str:
method search_stock_news (line 233) | def search_stock_news(
function create_search_engine_crawler (line 293) | def create_search_engine_crawler(mcp_server_path: Optional[str] = None) ...
FILE: backend/app/tools/sina_crawler.py
class SinaCrawlerTool (line 16) | class SinaCrawlerTool(BaseCrawler):
method __init__ (line 26) | def __init__(self):
method crawl (line 33) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 57) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _crawl_news_detail (line 108) | def _crawl_news_detail(self, url: str) -> Optional[NewsItem]:
method _extract_title (line 159) | def _extract_title(self, soup: BeautifulSoup) -> Optional[str]:
method _extract_meta (line 175) | def _extract_meta(self, soup: BeautifulSoup) -> tuple:
method _extract_date (line 191) | def _extract_date(self, soup: BeautifulSoup) -> Optional[datetime]:
method _parse_date (line 208) | def _parse_date(self, date_text: str) -> Optional[datetime]:
method _extract_stock_codes (line 230) | def _extract_stock_codes(self, soup: BeautifulSoup) -> List[str]:
method _extract_content (line 244) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _is_noise_text (line 301) | def _is_noise_text(self, text: str) -> bool:
function create_sina_crawler (line 333) | def create_sina_crawler() -> SinaCrawlerTool:
FILE: backend/app/tools/tencent_crawler.py
class TencentCrawlerTool (line 17) | class TencentCrawlerTool(BaseCrawler):
method __init__ (line 28) | def __init__(self):
method crawl (line 34) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 59) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _fetch_api_news (line 126) | def _fetch_api_news(self, page: int = 0) -> List[dict]:
method _parse_api_news_item (line 202) | def _parse_api_news_item(self, news_data: dict) -> Optional[NewsItem]:
method _extract_news_links (line 267) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 331) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 379) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 410) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 445) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 490) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/app/tools/text_cleaner.py
class TextCleanerTool (line 16) | class TextCleanerTool(BaseTool):
method __init__ (line 34) | def __init__(self):
method _load_custom_dict (line 48) | def _load_custom_dict(self):
method clean_text (line 64) | def clean_text(self, text: str) -> str:
method tokenize (line 91) | def tokenize(self, text: str, remove_stopwords: bool = True) -> List[s...
method extract_keywords (line 124) | def extract_keywords(self, text: str, top_k: int = 10) -> List[str]:
method normalize_stock_code (line 144) | def normalize_stock_code(self, code: str) -> str:
method _setup_parameters (line 159) | def _setup_parameters(self):
method execute (line 164) | def execute(self, **kwargs) -> dict:
method aexecute (line 201) | async def aexecute(self, **kwargs) -> dict:
function create_text_cleaner (line 216) | def create_text_cleaner() -> TextCleanerTool:
FILE: backend/app/tools/yicai_crawler.py
class YicaiCrawlerTool (line 16) | class YicaiCrawlerTool(BaseCrawler):
method __init__ (line 26) | def __init__(self):
method crawl (line 32) | def crawl(self, start_page: int = 1, end_page: int = 1) -> List[NewsIt...
method _crawl_page (line 56) | def _crawl_page(self, page: int) -> List[NewsItem]:
method _extract_news_links (line 84) | def _extract_news_links(self, soup: BeautifulSoup) -> List[dict]:
method _extract_news_item (line 110) | def _extract_news_item(self, link_info: dict) -> Optional[NewsItem]:
method _extract_content (line 145) | def _extract_content(self, soup: BeautifulSoup) -> str:
method _extract_publish_time (line 168) | def _extract_publish_time(self, soup: BeautifulSoup) -> Optional[datet...
method _parse_time_string (line 182) | def _parse_time_string(self, time_str: str) -> datetime:
method _extract_author (line 202) | def _extract_author(self, soup: BeautifulSoup) -> Optional[str]:
FILE: backend/clear_news_data.py
function clear_all_news_data (line 24) | def clear_all_news_data():
FILE: backend/init_knowledge_graph.py
function init_knowledge_graph (line 18) | async def init_knowledge_graph():
FILE: backend/reset_database.py
function reset_database (line 16) | async def reset_database():
FILE: backend/tests/check_milvus_data.py
function main (line 15) | def main():
FILE: backend/tests/check_news_embedding_status.py
function main (line 17) | async def main():
FILE: backend/tests/financial/test_smoke_openbb_models.py
class TestNewsModels (line 17) | class TestNewsModels:
method test_news_query_params_basic (line 20) | def test_news_query_params_basic(self):
method test_news_query_params_validation (line 40) | def test_news_query_params_validation(self):
method test_news_data_basic (line 59) | def test_news_data_basic(self):
method test_news_data_with_sentiment (line 78) | def test_news_data_with_sentiment(self):
method test_news_data_generate_id (line 96) | def test_news_data_generate_id(self):
method test_news_data_to_legacy_dict (line 113) | def test_news_data_to_legacy_dict(self):
class TestStockModels (line 138) | class TestStockModels:
method test_stock_query_params_basic (line 141) | def test_stock_query_params_basic(self):
method test_stock_price_data_basic (line 164) | def test_stock_price_data_basic(self):
method test_stock_price_data_to_legacy_dict (line 182) | def test_stock_price_data_to_legacy_dict(self):
method test_kline_interval_enum (line 205) | def test_kline_interval_enum(self):
method test_adjust_type_enum (line 213) | def test_adjust_type_enum(self):
FILE: backend/tests/financial/test_smoke_openbb_provider.py
class TestBaseFetcherAbstraction (line 18) | class TestBaseFetcherAbstraction:
method test_fetcher_subclass_implementation (line 21) | def test_fetcher_subclass_implementation(self):
method test_fetcher_fetch_pipeline (line 60) | async def test_fetcher_fetch_pipeline(self):
class TestBaseProviderAbstraction (line 96) | class TestBaseProviderAbstraction:
method test_provider_subclass_implementation (line 99) | def test_provider_subclass_implementation(self):
class TestProviderRegistry (line 142) | class TestProviderRegistry:
method test_registry_singleton (line 145) | def test_registry_singleton(self):
method test_registry_register_and_list (line 153) | def test_registry_register_and_list(self):
method test_registry_get_fetcher_auto_fallback (line 188) | def test_registry_get_fetcher_auto_fallback(self):
method test_registry_get_fetcher_by_name (line 244) | def test_registry_get_fetcher_by_name(self):
class TestSinaProvider (line 285) | class TestSinaProvider:
method test_sina_provider_info (line 288) | def test_sina_provider_info(self):
method test_sina_provider_get_news_fetcher (line 298) | def test_sina_provider_get_news_fetcher(self):
method test_sina_news_fetcher_transform_query (line 309) | def test_sina_news_fetcher_transform_query(self):
FILE: backend/tests/financial/test_smoke_openbb_tools.py
class TestFinancialNewsTool (line 17) | class TestFinancialNewsTool:
method test_tool_instantiation (line 20) | def test_tool_instantiation(self):
method test_tool_has_required_methods (line 29) | def test_tool_has_required_methods(self):
method test_tool_returns_error_when_no_provider (line 41) | async def test_tool_returns_error_when_no_provider(self):
method test_tool_with_mocked_fetcher (line 57) | async def test_tool_with_mocked_fetcher(self):
class TestStockPriceTool (line 113) | class TestStockPriceTool:
method test_tool_instantiation (line 116) | def test_tool_instantiation(self):
method test_tool_returns_error_for_invalid_interval (line 126) | async def test_tool_returns_error_for_invalid_interval(self):
method test_tool_returns_error_when_no_provider (line 137) | async def test_tool_returns_error_when_no_provider(self):
class TestSetupDefaultProviders (line 151) | class TestSetupDefaultProviders:
method test_setup_registers_sina (line 154) | def test_setup_registers_sina(self):
method test_setup_idempotent (line 166) | def test_setup_idempotent(self):
FILE: backend/tests/manual_vectorize.py
function vectorize_news_manually (line 22) | async def vectorize_news_manually(news_id: int):
function vectorize_all_pending (line 120) | async def vectorize_all_pending():
function main_async (line 181) | async def main_async():
FILE: backend/tests/test_alpha_mining/test_integration_p2.py
class TestQuantitativeAgent (line 25) | class TestQuantitativeAgent:
method test_agent_import (line 28) | def test_agent_import(self):
method test_agent_init_without_llm (line 35) | def test_agent_init_without_llm(self):
method test_agent_lazy_init (line 47) | def test_agent_lazy_init(self):
method test_agent_mine_factors (line 66) | async def test_agent_mine_factors(self):
method test_agent_full_analysis (line 84) | async def test_agent_full_analysis(self):
method test_agent_with_mock_llm (line 106) | async def test_agent_with_mock_llm(self):
method test_agent_evaluate_factor (line 136) | def test_agent_evaluate_factor(self):
method test_agent_get_best_factors (line 151) | def test_agent_get_best_factors(self):
class TestAlphaMiningAPI (line 174) | class TestAlphaMiningAPI:
method test_api_module_import (line 177) | def test_api_module_import(self):
method test_api_routes_exist (line 184) | def test_api_routes_exist(self):
method test_client (line 198) | def test_client(self):
method test_get_operators (line 207) | def test_get_operators(self, test_client):
method test_get_factors_empty (line 220) | def test_get_factors_empty(self, test_client):
method test_evaluate_factor (line 232) | def test_evaluate_factor(self, test_client):
method test_generate_factors (line 246) | def test_generate_factors(self, test_client):
class TestFullWorkflow (line 266) | class TestFullWorkflow:
method test_end_to_end_factor_discovery (line 270) | async def test_end_to_end_factor_discovery(self):
method test_quantitative_agent_workflow (line 336) | async def test_quantitative_agent_workflow(self):
method test_api_and_agent_integration (line 371) | def test_api_and_agent_integration(self):
class TestPerformance (line 408) | class TestPerformance:
method test_generator_speed (line 411) | def test_generator_speed(self):
method test_vm_execution_speed (line 433) | def test_vm_execution_speed(self):
FILE: backend/tests/test_alpha_mining/test_smoke_p0.py
class TestConfig (line 38) | class TestConfig:
method test_default_config_exists (line 41) | def test_default_config_exists(self):
method test_config_device (line 46) | def test_config_device(self):
method test_config_features (line 52) | def test_config_features(self):
class TestOps (line 64) | class TestOps:
method sample_tensor (line 68) | def sample_tensor(self):
method test_ts_delay (line 72) | def test_ts_delay(self, sample_tensor):
method test_ts_delta (line 81) | def test_ts_delta(self, sample_tensor):
method test_ts_mean (line 89) | def test_ts_mean(self, sample_tensor):
method test_ts_std (line 96) | def test_ts_std(self, sample_tensor):
method test_ops_config_complete (line 103) | def test_ops_config_complete(self):
method test_all_ops_executable (line 111) | def test_all_ops_executable(self, sample_tensor):
class TestVocab (line 135) | class TestVocab:
method test_default_vocab_exists (line 138) | def test_default_vocab_exists(self):
method test_vocab_token_mapping (line 143) | def test_vocab_token_mapping(self):
method test_vocab_is_feature_operator (line 156) | def test_vocab_is_feature_operator(self):
method test_vocab_get_operator_arity (line 169) | def test_vocab_get_operator_arity(self):
class TestFactorVM (line 182) | class TestFactorVM:
method vm (line 186) | def vm(self):
method features (line 191) | def features(self):
method test_vm_execute_simple (line 196) | def test_vm_execute_simple(self, vm, features):
method test_vm_execute_binary_op (line 206) | def test_vm_execute_binary_op(self, vm, features):
method test_vm_execute_unary_op (line 219) | def test_vm_execute_unary_op(self, vm, features):
method test_vm_execute_invalid_formula (line 232) | def test_vm_execute_invalid_formula(self, vm, features):
method test_vm_decode_simple (line 243) | def test_vm_decode_simple(self, vm):
method test_vm_validate (line 255) | def test_vm_validate(self, vm):
class TestAlphaGenerator (line 275) | class TestAlphaGenerator:
method generator (line 279) | def generator(self):
method test_generator_init (line 284) | def test_generator_init(self, generator):
method test_generator_forward (line 289) | def test_generator_forward(self, generator):
method test_generator_generate (line 300) | def test_generator_generate(self, generator):
method test_generator_generate_with_training (line 314) | def test_generator_generate_with_training(self, generator):
class TestAlphaTrainer (line 333) | class TestAlphaTrainer:
method trainer (line 337) | def trainer(self):
method mock_data (line 348) | def mock_data(self):
method test_trainer_init (line 357) | def test_trainer_init(self, trainer):
method test_trainer_train_step (line 363) | def test_trainer_train_step(self, trainer, mock_data):
method test_trainer_short_training (line 374) | def test_trainer_short_training(self, trainer, mock_data):
class TestMockData (line 393) | class TestMockData:
method test_generate_mock_data_shape (line 396) | def test_generate_mock_data_shape(self):
method test_generate_mock_data_no_nan (line 407) | def test_generate_mock_data_no_nan(self):
method test_generate_mock_data_reproducible (line 414) | def test_generate_mock_data_reproducible(self):
class TestEndToEnd (line 427) | class TestEndToEnd:
method test_full_pipeline_smoke (line 430) | def test_full_pipeline_smoke(self):
FILE: backend/tests/test_alpha_mining/test_smoke_p1.py
class TestMarketFeatureBuilder (line 34) | class TestMarketFeatureBuilder:
method builder (line 38) | def builder(self):
method sample_df (line 42) | def sample_df(self):
method test_build_from_dataframe (line 54) | def test_build_from_dataframe(self, builder, sample_df):
method test_build_from_tensors (line 63) | def test_build_from_tensors(self, builder):
method test_features_normalized (line 74) | def test_features_normalized(self, builder, sample_df):
method test_no_nan_in_features (line 82) | def test_no_nan_in_features(self, builder, sample_df):
method test_feature_names (line 89) | def test_feature_names(self, builder):
class TestSentimentFeatureBuilder (line 103) | class TestSentimentFeatureBuilder:
method builder (line 107) | def builder(self):
method sample_df (line 111) | def sample_df(self):
method test_build_from_dataframe (line 122) | def test_build_from_dataframe(self, builder, sample_df):
method test_build_from_dict (line 131) | def test_build_from_dict(self, builder):
method test_build_from_list (line 142) | def test_build_from_list(self, builder):
method test_time_alignment (line 154) | def test_time_alignment(self, builder):
method test_sentiment_decay (line 162) | def test_sentiment_decay(self, builder):
method test_combine_with_market (line 173) | def test_combine_with_market(self, builder):
class TestFactorEvaluator (line 187) | class TestFactorEvaluator:
method evaluator (line 191) | def evaluator(self):
method sample_data (line 195) | def sample_data(self):
method test_evaluate_basic (line 209) | def test_evaluate_basic(self, evaluator, sample_data):
method test_evaluate_batch (line 222) | def test_evaluate_batch(self, evaluator):
method test_get_reward (line 233) | def test_get_reward(self, evaluator, sample_data):
method test_good_factor_high_ic (line 242) | def test_good_factor_high_ic(self, evaluator):
method test_random_factor_low_ic (line 253) | def test_random_factor_low_ic(self, evaluator):
method test_compare_factors (line 263) | def test_compare_factors(self, evaluator):
method test_rank_factors (line 281) | def test_rank_factors(self, evaluator):
class TestAlphaMiningToolImport (line 299) | class TestAlphaMiningToolImport:
method test_import_tool (line 302) | def test_import_tool(self):
method test_tool_metadata (line 311) | def test_tool_metadata(self):
class TestP1EndToEnd (line 329) | class TestP1EndToEnd:
method test_full_pipeline_with_real_features (line 332) | def test_full_pipeline_with_real_features(self):
FILE: backend/tests/test_smoke_alpha_mining.py
class TestDSLOperators (line 24) | class TestDSLOperators:
method test_ops_config_exists (line 27) | def test_ops_config_exists(self):
method test_arithmetic_ops (line 41) | def test_arithmetic_ops(self):
method test_timeseries_ops (line 65) | def test_timeseries_ops(self):
class TestFactorVM (line 85) | class TestFactorVM:
method vm (line 89) | def vm(self):
method sample_features (line 95) | def sample_features(self):
method test_execute_simple_formula (line 99) | def test_execute_simple_formula(self, vm, sample_features):
method test_execute_invalid_formula (line 108) | def test_execute_invalid_formula(self, vm, sample_features):
method test_decode_formula (line 121) | def test_decode_formula(self, vm):
class TestAlphaGenerator (line 129) | class TestAlphaGenerator:
method generator (line 133) | def generator(self):
method test_generate_batch (line 141) | def test_generate_batch(self, generator):
method test_generate_with_training (line 150) | def test_generate_with_training(self, generator):
class TestAlphaTrainer (line 160) | class TestAlphaTrainer:
method trainer (line 164) | def trainer(self):
method sample_data (line 173) | def sample_data(self):
method test_train_step (line 179) | def test_train_step(self, trainer, sample_data):
method test_train_with_callback (line 191) | def test_train_with_callback(self, trainer, sample_data):
class TestFactorEvaluator (line 212) | class TestFactorEvaluator:
method evaluator (line 216) | def evaluator(self):
method test_evaluate_factor (line 220) | def test_evaluate_factor(self, evaluator):
method test_get_reward (line 235) | def test_get_reward(self, evaluator):
class TestVocab (line 245) | class TestVocab:
method test_vocab_initialization (line 248) | def test_vocab_initialization(self):
method test_token_conversion (line 258) | def test_token_conversion(self):
class TestAPIEndpoints (line 275) | class TestAPIEndpoints:
method client (line 279) | def client(self):
method test_get_operators (line 288) | def test_get_operators(self, client):
method test_get_factors_empty (line 299) | def test_get_factors_empty(self, client):
method test_evaluate_factor (line 308) | def test_evaluate_factor(self, client):
method test_mine_task_start (line 320) | def test_mine_task_start(self, client):
class TestEdgeCases (line 333) | class TestEdgeCases:
method test_empty_formula (line 336) | def test_empty_formula(self):
method test_constant_factor_penalty (line 347) | def test_constant_factor_penalty(self):
method test_nan_handling (line 359) | def test_nan_handling(self):
FILE: frontend/src/App.tsx
function App (line 12) | function App() {
FILE: frontend/src/components/DebateChatRoom.tsx
type ChatRole (line 18) | type ChatRole = 'user' | 'bull' | 'bear' | 'manager' | 'system' | 'data_...
type SearchTask (line 21) | interface SearchTask {
type SearchPlan (line 30) | interface SearchPlan {
type ChatMessage (line 40) | interface ChatMessage {
type DebateChatRoomProps (line 118) | interface DebateChatRoomProps {
FILE: frontend/src/components/DebateConfig.tsx
type DebateMode (line 21) | interface DebateMode {
type ModeRules (line 30) | interface ModeRules {
constant DEFAULT_RULES (line 61) | const DEFAULT_RULES: Record<string, ModeRules> = {
type DebateConfigProps (line 82) | interface DebateConfigProps {
FILE: frontend/src/components/DebateHistorySidebar.tsx
type DebateHistorySidebarProps (line 20) | interface DebateHistorySidebarProps {
FILE: frontend/src/components/HighlightText.tsx
type HighlightTextProps (line 3) | interface HighlightTextProps {
function HighlightText (line 25) | function HighlightText({ text, highlight, className = '' }: HighlightTex...
FILE: frontend/src/components/KLineChart.tsx
type KLineChartProps (line 64) | interface KLineChartProps {
function KLineChart (line 75) | function KLineChart({
function MiniKLineChart (line 424) | function MiniKLineChart({
FILE: frontend/src/components/MentionInput.tsx
type MentionType (line 18) | type MentionType = 'agent' | 'source' | 'stock'
type MentionTarget (line 20) | interface MentionTarget {
constant AGENTS (line 30) | const AGENTS: MentionTarget[] = [
constant SOURCES (line 66) | const SOURCES: MentionTarget[] = [
constant ALL_TARGETS (line 102) | const ALL_TARGETS = [...AGENTS, ...SOURCES]
type MentionInputProps (line 104) | interface MentionInputProps {
FILE: frontend/src/components/ModelSelector.tsx
type ModelConfig (line 18) | interface ModelConfig {
constant PROVIDER_I18N (line 24) | const PROVIDER_I18N: Record<string, { labelZh: string; labelEn: string }...
constant MODEL_DESCRIPTION_I18N (line 47) | const MODEL_DESCRIPTION_I18N: Record<string, { descZh: string; descEn: s...
constant DEFAULT_CONFIG (line 70) | const DEFAULT_CONFIG: ModelConfig = {
function ModelSelector (line 75) | function ModelSelector() {
function useModelConfig (line 253) | function useModelConfig() {
FILE: frontend/src/components/NewsDetailDrawer.tsx
constant NEWS_SOURCES (line 34) | const NEWS_SOURCES = [
type NewsDetailDrawerProps (line 48) | interface NewsDetailDrawerProps {
function NewsDetailDrawer (line 54) | function NewsDetailDrawer({
FILE: frontend/src/components/StockSearch.tsx
type StockSearchProps (line 13) | interface StockSearchProps {
function StockSearch (line 19) | function StockSearch({
FILE: frontend/src/components/alpha-mining/AgentDemo.tsx
type AgentDemoResult (line 21) | interface AgentDemoResult {
type AgentDemoProps (line 31) | interface AgentDemoProps {
type FlowNodeProps (line 356) | interface FlowNodeProps {
FILE: frontend/src/components/alpha-mining/MetricsDashboard.tsx
type FactorMetrics (line 24) | interface FactorMetrics {
type MetricsDashboardProps (line 36) | interface MetricsDashboardProps {
type MetricCardProps (line 394) | interface MetricCardProps {
function normalizeMetric (line 434) | function normalizeMetric(value: number, min: number, max: number): number {
function getFactorRating (line 440) | function getFactorRating(metrics: FactorMetrics, t: any): {
FILE: frontend/src/components/alpha-mining/OperatorGrid.tsx
type OperatorCategory (line 21) | type OperatorCategory = 'arithmetic' | 'unary' | 'timeseries' | 'conditi...
type Operator (line 23) | interface Operator {
type IconComponent (line 33) | type IconComponent = React.ComponentType<{ className?: string }>;
constant OPERATOR_ICON_COMPONENTS (line 36) | const OPERATOR_ICON_COMPONENTS: Record<string, IconComponent> = {
constant FEATURES (line 101) | const FEATURES = ['RET', 'VOL', 'VOLUME_CHG', 'TURNOVER', 'SENTIMENT', '...
type OperatorGridProps (line 112) | interface OperatorGridProps {
type OperatorCardProps (line 236) | interface OperatorCardProps {
FILE: frontend/src/components/alpha-mining/SentimentCompare.tsx
type CompareResult (line 24) | interface CompareResult {
type SentimentCompareProps (line 31) | interface SentimentCompareProps {
FILE: frontend/src/components/alpha-mining/TrainingMonitor.tsx
type TrainingMetrics (line 24) | interface TrainingMetrics {
type TrainingMonitorProps (line 35) | interface TrainingMonitorProps {
type TrainingStatus (line 40) | type TrainingStatus = 'idle' | 'running' | 'completed' | 'error';
type MetricCardProps (line 385) | interface MetricCardProps {
FILE: frontend/src/components/ui/badge.tsx
type BadgeProps (line 28) | interface BadgeProps
function Badge (line 32) | function Badge({ className, variant, ...props }: BadgeProps) {
FILE: frontend/src/components/ui/button.tsx
type ButtonProps (line 36) | interface ButtonProps
FILE: frontend/src/components/ui/sheet.tsx
type SheetContextValue (line 5) | interface SheetContextValue {
type SheetContentProps (line 70) | interface SheetContentProps extends React.HTMLAttributes<HTMLDivElement> {
FILE: frontend/src/context/NewsToolbarContext.tsx
type ToolbarContent (line 3) | interface ToolbarContent {
type NewsToolbarContextValue (line 8) | interface NewsToolbarContextValue {
FILE: frontend/src/hooks/useDebounce.ts
function useDebounce (line 21) | function useDebounce<T>(value: T, delay: number = 500): T {
FILE: frontend/src/layout/MainLayout.tsx
function MainLayout (line 17) | function MainLayout() {
function MainLayoutInner (line 25) | function MainLayoutInner() {
FILE: frontend/src/lib/api-client.ts
constant API_BASE_URL (line 23) | const API_BASE_URL = import.meta.env.VITE_API_BASE_URL || 'http://localh...
type ModelInfo (line 230) | interface ModelInfo {
type ProviderInfo (line 236) | interface ProviderInfo {
type LLMConfigResponse (line 244) | interface LLMConfigResponse {
type SSEDebateEvent (line 566) | interface SSEDebateEvent {
type AlphaMiningFactor (line 904) | interface AlphaMiningFactor {
type AlphaMiningMetrics (line 914) | interface AlphaMiningMetrics {
type MineRequest (line 926) | interface MineRequest {
type EvaluateRequest (line 933) | interface EvaluateRequest {
type SentimentCompareResult (line 938) | interface SentimentCompareResult {
type OperatorInfo (line 945) | interface OperatorInfo {
FILE: frontend/src/lib/utils.ts
function cn (line 4) | function cn(...inputs: ClassValue[]) {
function formatDate (line 8) | function formatDate(date: string | Date): string {
type TimeI18n (line 19) | interface TimeI18n {
function formatRelativeTime (line 33) | function formatRelativeTime(date: string | Date, i18n?: TimeI18n): string {
FILE: frontend/src/pages/AgentMonitorPage.tsx
constant AGENT_ROLES (line 32) | const AGENT_ROLES: Record<string, { roleZh: string; roleEn: string; desc...
constant WORKFLOW_DESCRIPTIONS (line 66) | const WORKFLOW_DESCRIPTIONS: Record<string, { descZh: string; descEn: st...
function formatTimestamp (line 97) | function formatTimestamp(timestamp: string, locale: string = 'zh-CN'): s...
function AgentMonitorPage (line 108) | function AgentMonitorPage() {
FILE: frontend/src/pages/AlphaMiningPage.tsx
type FactorCardProps (line 427) | interface FactorCardProps {
FILE: frontend/src/pages/Dashboard.tsx
constant NEWS_SOURCES (line 14) | const NEWS_SOURCES = [
constant SOURCE_NAME_TO_KEY (line 29) | const SOURCE_NAME_TO_KEY: Record<string, string> = {
constant EXTENDED_NEWS_SOURCES (line 54) | const EXTENDED_NEWS_SOURCES: Record<string, { nameZh: string; nameEn: st...
function Dashboard (line 66) | function Dashboard() {
FILE: frontend/src/pages/NewsListPage.tsx
type FilterType (line 17) | type FilterType = 'all' | 'pending' | 'positive' | 'negative' | 'neutral'
function SearchBox (line 20) | function SearchBox({ onSearch }: { onSearch: (query: string) => void }) {
constant NEWS_SOURCES (line 73) | const NEWS_SOURCES = [
constant SOURCE_NAME_TO_KEY (line 88) | const SOURCE_NAME_TO_KEY: Record<string, string> = {
constant EXTENDED_NEWS_SOURCES (line 113) | const EXTENDED_NEWS_SOURCES: Record<string, { nameZh: string; nameEn: st...
function NewsListPage (line 125) | function NewsListPage() {
FILE: frontend/src/pages/StockAnalysisPage.tsx
type KLinePeriod (line 73) | type KLinePeriod = 'daily' | '1m' | '5m' | '15m' | '30m' | '60m'
type KLineAdjust (line 84) | type KLineAdjust = 'qfq' | 'hfq' | ''
type CrawlTaskStatus (line 92) | type CrawlTaskStatus = 'idle' | 'pending' | 'running' | 'completed' | 'f...
type CrawlTaskState (line 94) | interface CrawlTaskState {
function StockAnalysisPage (line 105) | function StockAnalysisPage() {
FILE: frontend/src/pages/StockSearchPage.tsx
function StockSearchPage (line 23) | function StockSearchPage() {
FILE: frontend/src/pages/TaskManagerPage.tsx
function TaskManagerPage (line 8) | function TaskManagerPage() {
FILE: frontend/src/store/useDebateStore.ts
type ChatRole (line 5) | type ChatRole = 'user' | 'bull' | 'bear' | 'manager' | 'system' | 'data_...
type ChatMessage (line 7) | interface ChatMessage {
type AnalysisResult (line 20) | interface AnalysisResult {
type DebateSession (line 33) | interface DebateSession {
type SerializedSession (line 48) | interface SerializedSession {
type DebateStore (line 58) | interface DebateStore {
FILE: frontend/src/store/useLanguageStore.ts
type Lang (line 8) | type Lang = 'zh' | 'en';
type LanguageState (line 10) | interface LanguageState {
FILE: frontend/src/store/useNewsStore.ts
type NewsStore (line 4) | interface NewsStore {
FILE: frontend/src/store/useTaskStore.ts
type TaskStore (line 4) | interface TaskStore {
FILE: frontend/src/types/api.ts
type News (line 6) | interface News {
type Analysis (line 20) | interface Analysis {
type CrawlTask (line 34) | interface CrawlTask {
type TaskStats (line 58) | interface TaskStats {
type CrawlRequest (line 67) | interface CrawlRequest {
type CrawlResponse (line 73) | interface CrawlResponse {
type AnalysisResponse (line 81) | interface AnalysisResponse {
type StockOverview (line 95) | interface StockOverview {
type StockNewsItem (line 106) | interface StockNewsItem {
type SentimentTrendPoint (line 117) | interface SentimentTrendPoint {
type KLineDataPoint (line 126) | interface KLineDataPoint {
type RealtimeQuote (line 141) | interface RealtimeQuote {
type DebateRequest (line 157) | interface DebateRequest {
type AgentAnalysis (line 167) | interface AgentAnalysis {
type FinalDecision (line 177) | interface FinalDecision {
type TrajectoryStep (line 187) | interface TrajectoryStep {
type QuickAnalysisResult (line 193) | interface QuickAnalysisResult {
type DebateHistoryItem (line 200) | interface DebateHistoryItem {
type DebateResponse (line 207) | interface DebateResponse {
type AgentLogEntry (line 225) | interface AgentLogEntry {
type AgentMetrics (line 236) | interface AgentMetrics {
type AgentInfo (line 255) | interface AgentInfo {
type WorkflowInfo (line 262) | interface WorkflowInfo {
FILE: legacy_v1/Crawler/crawler_cnstock.py
class WebCrawlFromcnstock (line 19) | class WebCrawlFromcnstock(object):
method __init__ (line 35) | def __init__(self,**kwarg):
method ConnDB (line 45) | def ConnDB(self):
method countchn (line 52) | def countchn(self,string):
method getUrlInfo (line 64) | def getUrlInfo(self,url):
method GenPagesLst (line 95) | def GenPagesLst(self,totalPages,Range,initPageID):
method CrawlHistoryCompanyNews (line 107) | def CrawlHistoryCompanyNews(self,startPage,endPage,url_Part_1):
method CrawlRealtimeCompanyNews (line 164) | def CrawlRealtimeCompanyNews(self,url_part_lst):
method extractData (line 225) | def extractData(self,tag_list):
method coroutine_run (line 234) | def coroutine_run(self,totalPages,Range,initPageID,**kwarg):
method multi_threads_run (line 243) | def multi_threads_run(self,**kwarg):
method classifyRealtimeStockNews (line 252) | def classifyRealtimeStockNews(self):
FILE: legacy_v1/Crawler/crawler_jrj.py
class WebCrawlFromjrj (line 20) | class WebCrawlFromjrj(object):
method __init__ (line 34) | def __init__(self,*arg,**kwarg):
method getEveryDay (line 47) | def getEveryDay(self,begin_date,end_date):
method countchn (line 59) | def countchn(self,string):
method getUrlInfo (line 71) | def getUrlInfo(self,url,specificDate):
method GenDatesLst (line 116) | def GenDatesLst(self):
method findPagesOfSpecificDate (line 131) | def findPagesOfSpecificDate(self,firstUrl,date):
method CrawlRealtimeCompanyNews (line 149) | def CrawlRealtimeCompanyNews(self,today_Date):
method CrawlHistoryCompanyNews (line 224) | def CrawlHistoryCompanyNews(self,datelst):
method ConnDB (line 291) | def ConnDB(self):
method extractData (line 298) | def extractData(self,tag_list):
method StockCodeDuplicateRemoval (line 307) | def StockCodeDuplicateRemoval(self):
method coroutine_run (line 330) | def coroutine_run(self):
method multi_threads_run (line 339) | def multi_threads_run(self,**kwarg):
method classifyRealtimeStockNews (line 348) | def classifyRealtimeStockNews(self):
FILE: legacy_v1/Crawler/crawler_nbd.py
class WebCrawlFromNBD (line 17) | class WebCrawlFromNBD(object):
method __init__ (line 32) | def __init__(self,*arg,**kwarg):
method countchn (line 46) | def countchn(self,string):
method getUrlInfo (line 58) | def getUrlInfo(self,url):
method GenPagesLst (line 95) | def GenPagesLst(self):
method ReCrawlNews (line 107) | def ReCrawlNews(self,url_list):
method ReCrawlArticles (line 150) | def ReCrawlArticles(self,url_list,title_list):
method CrawlCompanyNews (line 179) | def CrawlCompanyNews(self,startPage,endPage):
method ConnDB (line 244) | def ConnDB(self):
method extractData (line 251) | def extractData(self,tag_list):
method single_run (line 260) | def single_run(self):
method multi_threads_run (line 268) | def multi_threads_run(self):
method coroutine_run (line 283) | def coroutine_run(self):
FILE: legacy_v1/Crawler/crawler_sina.py
class WebCrawlFromSina (line 19) | class WebCrawlFromSina(object):
method __init__ (line 33) | def __init__(self,*arg,**kwarg):
method countchn (line 45) | def countchn(self,string):
method getUrlInfo (line 57) | def getUrlInfo(self,url):
method GenPagesLst (line 131) | def GenPagesLst(self):
method CrawlRealtimeCompanyNews (line 143) | def CrawlRealtimeCompanyNews(self,firstPage):
method CrawlHistoryCompanyNews (line 206) | def CrawlHistoryCompanyNews(self,startPage,endPage):
method ConnDB (line 271) | def ConnDB(self):
method extractData (line 278) | def extractData(self,tag_list):
method single_run (line 287) | def single_run(self):
method coroutine_run (line 294) | def coroutine_run(self):
method multi_threads_run (line 303) | def multi_threads_run(self,**kwarg):
method classifyRealtimeStockNews (line 312) | def classifyRealtimeStockNews(self):
FILE: legacy_v1/Crawler/crawler_stcn.py
class WebCrawlFromstcn (line 19) | class WebCrawlFromstcn(object):
method __init__ (line 37) | def __init__(self,**kwarg):
method countchn (line 47) | def countchn(self,string):
method getUrlInfo (line 59) | def getUrlInfo(self,url):
method GenPagesLst (line 90) | def GenPagesLst(self,totalPages,Range,initPageID):
method CrawlRealtimeCompanyNews (line 102) | def CrawlRealtimeCompanyNews(self,url_part_lst):
method CrawlCompanyNews (line 163) | def CrawlCompanyNews(self,startPage,endPage,url_Part_1):
method ConnDB (line 220) | def ConnDB(self):
method extractData (line 227) | def extractData(self,tag_list):
method coroutine_run (line 236) | def coroutine_run(self,totalPages,Range,initPageID,**kwarg):
method multi_threads_run (line 245) | def multi_threads_run(self,**kwarg):
method classifyRealtimeStockNews (line 254) | def classifyRealtimeStockNews(self):
FILE: legacy_v1/Crawler/crawler_tushare.py
class CrawlStockData (line 8) | class CrawlStockData(object):
method __init__ (line 9) | def __init__(self,**kwarg):
method ConnDB (line 15) | def ConnDB(self):
method extractData (line 18) | def extractData(self,dbName,colName,tag_list):
method getStockBasicFromTushare (line 27) | def getStockBasicFromTushare(self,dbName,colName):
method renewStockBasic (line 57) | def renewStockBasic(self):
method getStockTickHistory (line 60) | def getStockTickHistory(self,dbName,stockCode):
method getStockDayHistory (line 85) | def getStockDayHistory(self,dbName,stockCode):
method getCalendar (line 106) | def getCalendar(self,begin_date):
method isUnique (line 116) | def isUnique(self, List):
method getStockTickRealtime (line 125) | def getStockTickRealtime(self):
FILE: legacy_v1/Text_Analysis/text_mining.py
class TextMining (line 30) | class TextMining(object):
method __init__ (line 38) | def __init__(self,**kwarg):
method ConnDB (line 48) | def ConnDB(self):
method extractData (line 53) | def extractData(self,dbName,colName,tag_list):
method extractStockCodeFromArticle (line 72) | def extractStockCodeFromArticle(self,dbName,colName):
method extractStockCodeFromRealtimeNews (line 107) | def extractStockCodeFromRealtimeNews(self,documents):
method judgeGoodOrBadNews (line 125) | def judgeGoodOrBadNews(self,stockCode,date,judgeTerm):
method getNewsOfSpecificStock (line 152) | def getNewsOfSpecificStock(self,dbColLst,stockCode,**kwarg):
method classifyHistoryStockNews (line 252) | def classifyHistoryStockNews(self,dbName,stockCode,**kwarg):
method classifyRealtimeStockNews (line 330) | def classifyRealtimeStockNews(self,doc_list):
method SVMClassifier (line 386) | def SVMClassifier(self,train_X,train_Y,test_X,test_Y,tuned_parameters,...
method RdForestClassifier (line 424) | def RdForestClassifier(self,train_X,train_Y,test_X,test_Y,tuned_parame...
method ConvertToCSRMatrix (line 461) | def ConvertToCSRMatrix(self,modelVec):
method genTrainingSet (line 481) | def genTrainingSet(self,X,Y):
FILE: legacy_v1/Text_Analysis/text_processing.py
class TextProcessing (line 14) | class TextProcessing(object):
method __init__ (line 22) | def __init__(self,chnSTWPath,finance_dict):
method renewFinanceDict (line 26) | def renewFinanceDict(self,new_Word_list):
method getchnSTW (line 37) | def getchnSTW(self):
method jieba_tokenize (line 43) | def jieba_tokenize(self,documents):
method RemoveWordAppearOnce (line 62) | def RemoveWordAppearOnce(self,corpora_documents):
method genDictionary (line 75) | def genDictionary(self,documents,**kwarg):
method CallTransformationModel (line 96) | def CallTransformationModel(self,Dict,Bowvec,**kwarg):
method CalSim (line 155) | def CalSim(self,test_document,Type,best_num):
method PrintWorfCloud (line 191) | def PrintWorfCloud(self,documents,backgroundImgPath,fontPath):
FILE: legacy_v1/run_main.py
function crawlers (line 11) | def crawlers(web):
FILE: legacy_v1/src/Gon/__init__.py
function add_path (line 5) | def add_path(path):
FILE: legacy_v1/src/Gon/cnstockspyder.py
class CnStockSpyder (line 37) | class CnStockSpyder(Spyder):
method __init__ (line 39) | def __init__(self, database_name, collection_name):
method get_url_info (line 51) | def get_url_info(self, url):
method get_historical_news (line 78) | def get_historical_news(self, url, category_chn=None, start_date=None):
method get_realtime_news (line 256) | def get_realtime_news(self, url, category_chn=None, interval=60):
FILE: legacy_v1/src/Gon/jrjspyder.py
class JrjSpyder (line 27) | class JrjSpyder(Spyder):
method __init__ (line 29) | def __init__(self, database_name, collection_name):
method get_url_info (line 41) | def get_url_info(self, url, specific_date):
method get_historical_news (line 63) | def get_historical_news(self, url, start_date=None, end_date=None):
method get_realtime_news (line 168) | def get_realtime_news(self, interval=60):
FILE: legacy_v1/src/Gon/kill_realtime_spyder_tasks.py
class KillPyTasks (line 15) | class KillPyTasks(object):
method __init__ (line 17) | def __init__(self):
method killtask (line 30) | def killtask(pid):
method get_python_process (line 34) | def get_python_process(prop="python.exe", param=None):
method print_pid_info (line 46) | def print_pid_info(process):
FILE: legacy_v1/src/Gon/nbdspyder.py
class NbdSpyder (line 27) | class NbdSpyder(Spyder):
method __init__ (line 29) | def __init__(self, database_name, collection_name):
method get_url_info (line 41) | def get_url_info(self, url):
method get_historical_news (line 73) | def get_historical_news(self, start_page=684):
method get_realtime_news (line 217) | def get_realtime_news(self, interval=60):
FILE: legacy_v1/src/Gon/spyder.py
class Spyder (line 1) | class Spyder(object):
method __init__ (line 3) | def __init__(self):
method extract_data (line 6) | def extract_data(self, tag_list):
method query_news (line 14) | def query_news(self, _key, param):
method get_url_info (line 18) | def get_url_info(self, url):
method get_historical_news (line 21) | def get_historical_news(self, url):
method get_realtime_news (line 24) | def get_realtime_news(self, url):
FILE: legacy_v1/src/Gon/stockinfospyder.py
class StockInfoSpyder (line 29) | class StockInfoSpyder(Spyder):
method __init__ (line 31) | def __init__(self, database_name, collection_name):
method get_stock_code_info (line 43) | def get_stock_code_info(self):
method get_historical_news (line 54) | def get_historical_news(self, start_date=None, end_date=None, freq="da...
method get_realtime_news (line 107) | def get_realtime_news(self, freq="day"):
FILE: legacy_v1/src/Hisoka/classifier.py
class Classifier (line 24) | class Classifier(object):
method __init__ (line 26) | def __init__(self):
method train (line 29) | def train(self, train_x, train_y, test_x, test_y, model_type="svm", mo...
method model_load (line 76) | def model_load(classifier_save_path):
FILE: legacy_v1/src/Killua/__init__.py
function add_path (line 5) | def add_path(path):
FILE: legacy_v1/src/Killua/buildstocknewsdb.py
class GenStockNewsDB (line 20) | class GenStockNewsDB(object):
method __init__ (line 22) | def __init__(self):
method get_all_news_about_specific_stock (line 39) | def get_all_news_about_specific_stock(self, database_name, collection_...
method listen_redis_queue (line 78) | def listen_redis_queue(self):
method _label_news (line 138) | def _label_news(self, date, symbol, n_days):
method _stock_news_nums_stat (line 209) | def _stock_news_nums_stat(self):
FILE: legacy_v1/src/Killua/deduplication.py
class Deduplication (line 12) | class Deduplication(object):
method __init__ (line 14) | def __init__(self, database_name, collection_name):
method run (line 20) | def run(self):
FILE: legacy_v1/src/Killua/denull.py
class DeNull (line 15) | class DeNull(object):
method __init__ (line 17) | def __init__(self, database_name, collection_name):
method run (line 23) | def run(self):
FILE: legacy_v1/src/Kite/__init__.py
function add_path (line 5) | def add_path(path):
FILE: legacy_v1/src/Kite/database.py
class Database (line 5) | class Database(object):
method __init__ (line 7) | def __init__(self, ip="localhost", port=27017):
method connect_database (line 12) | def connect_database(self, database_name):
method get_collection (line 15) | def get_collection(self, database_name, collection_name):
method insert_data (line 18) | def insert_data(self, database_name, collection_name, data_dict):
method update_row (line 23) | def update_row(self, database_name, collection_name, query, new_values):
method get_data (line 30) | def get_data(self, database_name, collection_name, max_data_request=No...
method drop_db (line 74) | def drop_db(self, database):
FILE: legacy_v1/src/Kite/utils.py
function generate_pages_list (line 9) | def generate_pages_list(total_pages, range, init_page_id):
function count_chn (line 23) | def count_chn(string):
function get_date_list_from_range (line 37) | def get_date_list_from_range(begin_date, end_date):
function gen_dates_list (line 51) | def gen_dates_list(date_list, date_range):
function get_date_before (line 65) | def get_date_before(n_days):
function search_max_pages_num (line 78) | def search_max_pages_num(first_url, date):
function html_parser (line 102) | def html_parser(url):
function get_chn_stop_words (line 110) | def get_chn_stop_words(path):
function convert_to_csr_matrix (line 118) | def convert_to_csr_matrix(model_vector):
function generate_training_set (line 141) | def generate_training_set(x, y, split=0.8):
function is_contain_chn (line 157) | def is_contain_chn(word):
function batch_lpop (line 170) | def batch_lpop(client, key, n):
FILE: legacy_v1/src/Leorio/__init__.py
function add_path (line 5) | def add_path(path):
FILE: legacy_v1/src/Leorio/tokenization.py
class Tokenization (line 16) | class Tokenization(object):
method __init__ (line 18) | def __init__(self, import_module="jieba", user_dict=None, chn_stop_wor...
method update_user_dict (line 30) | def update_user_dict(self, old_user_dict_dir, new_user_dict_dir=None):
method cut_words (line 48) | def cut_words(self, text):
method find_relevant_stock_codes_in_article (line 70) | def find_relevant_stock_codes_in_article(self, article, stock_name_cod...
method update_news_database_rows (line 81) | def update_news_database_rows(self,
FILE: legacy_v1/src/Leorio/topicmodelling.py
class TopicModelling (line 24) | class TopicModelling(object):
method __init__ (line 26) | def __init__(self):
method create_dictionary (line 33) | def create_dictionary(self,
method renew_dictionary (line 65) | def renew_dictionary(self,
method create_bag_of_word_representation (line 83) | def create_bag_of_word_representation(self,
method transform_vectorized_corpus (line 113) | def transform_vectorized_corpus(corpora_dictionary,
method classify_stock_news (line 147) | def classify_stock_news(self,
Condensed preview — 293 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,974K chars).
[
{
"path": ".deepsource.toml",
"chars": 90,
"preview": "version = 1\n\n[[analyzers]]\nname = \"python\"\n\n [analyzers.meta]\n runtime_version = \"3.x.x\""
},
{
"path": ".gitignore",
"chars": 372,
"preview": "# Development documentation (local only, not for Git)\ndevlogs/\nconclusions/\nresearches/\n\n# Python\n__pycache__/\n*.py[cod]"
},
{
"path": "LICENSE",
"chars": 11339,
"preview": "\n Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 42439,
"preview": "# FinnewsHunter: Multi-Agent Investment Decision Platform Driven by Financial News\n\n<div align=\"right\">\n <a href=\"READM"
},
{
"path": "README_zn.md",
"chars": 28928,
"preview": "# FinnewsHunter:金融新闻驱动的多智能体投资决策平台\n\n<div align=\"right\">\n <a href=\"README_zn.md\">中文版</a> | <a href=\"README.md\">English</a"
},
{
"path": "backend/.gitignore",
"chars": 452,
"preview": "# Python\n__pycache__/\n*.py[cod]\n*$py.class\n*.so\n.Python\nenv/\nvenv/\nENV/\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.egg"
},
{
"path": "backend/README.md",
"chars": 4306,
"preview": "# FinnewsHunter Backend\n\nBackend service for the financial news intelligent analysis system based on the AgenticX framew"
},
{
"path": "backend/README_zn.md",
"chars": 3259,
"preview": "# FinnewsHunter Backend\n\n基于 AgenticX 框架的金融新闻智能分析系统后端服务。\n\n## 文档导航\n\n### 快速开始\n- **[QUICKSTART.md](../QUICKSTART.md)** - 快速启"
},
{
"path": "backend/add_raw_html_column.py",
"chars": 1426,
"preview": "\"\"\"\n数据库迁移:添加 raw_html 字段\n\"\"\"\nimport os\nfrom pathlib import Path\nfrom dotenv import load_dotenv\n\n# 加载环境变量\nenv_path = Path"
},
{
"path": "backend/app/__init__.py",
"chars": 65,
"preview": "\"\"\"\nFinnewsHunter Backend Application\n\"\"\"\n__version__ = \"0.1.0\"\n\n"
},
{
"path": "backend/app/agents/__init__.py",
"chars": 868,
"preview": "\"\"\"\n智能体模块\n\"\"\"\nfrom .news_analyst import NewsAnalystAgent, create_news_analyst\nfrom .debate_agents import (\n BullResea"
},
{
"path": "backend/app/agents/data_collector.py",
"chars": 9756,
"preview": "\"\"\"\n数据专员智能体\n\n负责在辩论前搜集和整理相关数据资料,包括:\n- 新闻数据(从数据库或BochaAI搜索)\n- 财务数据(从AkShare获取)\n- 行情数据(实时行情、K线等)\n\"\"\"\nimport logging\nfrom ty"
},
{
"path": "backend/app/agents/data_collector_v2.py",
"chars": 26788,
"preview": "\"\"\"\n数据专员智能体 V2 (DataCollectorAgent)\n\n统一负责所有数据获取任务,支持:\n- 辩论前的初始数据收集\n- 辩论中的动态数据补充\n- 用户追问时的按需搜索\n\n核心特性:\n1. 计划/执行分离:先生成搜索计划,用"
},
{
"path": "backend/app/agents/debate_agents.py",
"chars": 17385,
"preview": "\"\"\"\n辩论智能体 - Phase 2\n实现 Bull vs Bear 多智能体辩论机制\n\n支持动态搜索:智能体可以在发言中请求额外数据\n格式: [SEARCH: \"查询内容\" source:数据源]\n\"\"\"\nimport logging\n"
},
{
"path": "backend/app/agents/news_analyst.py",
"chars": 13657,
"preview": "\"\"\"\n新闻分析师智能体\n\"\"\"\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom agenticx import Agent, Task, BaseTool\n"
},
{
"path": "backend/app/agents/orchestrator.py",
"chars": 30564,
"preview": "\"\"\"\n协作编排器\n\n负责管理多智能体协作流程,支持:\n- 并行分析模式(parallel)\n- 实时辩论模式(realtime_debate)\n- 快速分析模式(quick_analysis)\n- 动态搜索模式(在辩论过程中按需获取数据)"
},
{
"path": "backend/app/agents/quantitative_agent.py",
"chars": 15088,
"preview": "\"\"\"\n量化分析智能体\n\n负责量化因子挖掘、技术分析和量化策略生成。\n集成 Alpha Mining 模块,提供自动化因子发现能力。\n\n功能:\n- 因子挖掘:使用 RL 自动发现有效交易因子\n- 因子评估:评估因子的预测能力和回测表现\n- "
},
{
"path": "backend/app/agents/search_analyst.py",
"chars": 18667,
"preview": "\"\"\"\n搜索分析师智能体 (SearchAnalystAgent)\n\n负责在辩论过程中动态搜集数据,支持多种数据源:\n- AkShare: 财务指标、K线数据、资金流向、机构持仓\n- BochaAI: 实时新闻搜索、分析师报告\n- Inte"
},
{
"path": "backend/app/alpha_mining/README.md",
"chars": 3906,
"preview": "# M12: Alpha Mining 量化因子挖掘模块\n\n基于 AlphaGPT 技术的量化因子自动挖掘模块,使用符号回归 + 强化学习自动发现有预测能力的交易因子。\n\n## 功能特性\n\n- **因子自动发现**:使用 Transform"
},
{
"path": "backend/app/alpha_mining/__init__.py",
"chars": 1274,
"preview": "\"\"\"\nM12: Alpha Mining Module for FinnewsHunter\n\n基于 AlphaGPT 技术的量化因子自动挖掘模块。\n使用符号回归 + 强化学习自动发现有预测能力的交易因子。\n\n核心组件:\n- dsl: 因子"
},
{
"path": "backend/app/alpha_mining/backtest/__init__.py",
"chars": 123,
"preview": "\"\"\"\n因子回测评估模块\n\n提供因子有效性评估,包括 Sortino Ratio 等指标计算。\n\"\"\"\n\nfrom .evaluator import FactorEvaluator\n\n__all__ = [\"FactorEvaluator"
},
{
"path": "backend/app/alpha_mining/backtest/evaluator.py",
"chars": 8847,
"preview": "\"\"\"\n因子回测评估器\n\n评估因子的预测能力和交易表现。\n\n评估指标:\n- Sortino Ratio: 风险调整收益(只考虑下行风险)\n- Sharpe Ratio: 风险调整收益\n- IC: 信息系数(因子与收益的相关性)\n- Rank"
},
{
"path": "backend/app/alpha_mining/config.py",
"chars": 2431,
"preview": "\"\"\"\nAlpha Mining 配置模块\n\n定义训练、模型、回测等配置参数。\n\nReferences:\n- AlphaGPT upstream/model_core/config.py\n\"\"\"\n\nimport torch\nfrom dat"
},
{
"path": "backend/app/alpha_mining/dsl/__init__.py",
"chars": 297,
"preview": "\"\"\"\n因子表达式 DSL(Domain Specific Language)\n\n包含操作符定义和词汇表管理。\n\"\"\"\n\nfrom .ops import OPS_CONFIG, ts_delay, ts_delta, ts_mean, t"
},
{
"path": "backend/app/alpha_mining/dsl/ops.py",
"chars": 4877,
"preview": "\"\"\"\n因子操作符定义\n\n定义因子表达式中可用的操作符,包括:\n- 算术运算:ADD, SUB, MUL, DIV\n- 一元运算:NEG, ABS, SIGN\n- 时序运算:DELAY, DELTA, MA, STD\n- 条件运算:GATE"
},
{
"path": "backend/app/alpha_mining/dsl/vocab.py",
"chars": 4361,
"preview": "\"\"\"\n因子词汇表管理\n\n管理因子表达式中的 token 词汇表,包括:\n- 特征 token(RET, VOL, VOLUME_CHG 等)\n- 操作符 token(ADD, SUB, MUL 等)\n\n提供 token <-> name "
},
{
"path": "backend/app/alpha_mining/features/__init__.py",
"chars": 244,
"preview": "\"\"\"\n特征构建器模块\n\n- MarketFeatureBuilder: 从行情数据构建特征\n- SentimentFeatureBuilder: 从新闻情感分析结果构建特征\n\"\"\"\n\nfrom .market import MarketF"
},
{
"path": "backend/app/alpha_mining/features/market.py",
"chars": 8083,
"preview": "\"\"\"\n行情特征构建器\n\n从原始行情数据(OHLCV)构建因子挖掘所需的标准化特征。\n\n特征列表:\n- RET: 收益率\n- VOL: 波动率(滚动标准差)\n- VOLUME_CHG: 成交量变化率\n- TURNOVER: 换手率\n\"\"\"\n"
},
{
"path": "backend/app/alpha_mining/features/sentiment.py",
"chars": 8924,
"preview": "\"\"\"\n情感特征构建器\n\n从 FinnewsHunter 的新闻分析结果构建情感特征。\n\n特征列表:\n- SENTIMENT: 情感分数(-1 到 1)\n- NEWS_COUNT: 新闻数量(标准化)\n\n与 FinnewsHunter 现有"
},
{
"path": "backend/app/alpha_mining/model/__init__.py",
"chars": 229,
"preview": "\"\"\"\n因子生成模型和训练器\n\n- AlphaGenerator: Transformer 策略网络,生成因子表达式\n- AlphaTrainer: RL 训练器,使用 REINFORCE 算法优化\n\"\"\"\n\nfrom .alpha_gen"
},
{
"path": "backend/app/alpha_mining/model/alpha_generator.py",
"chars": 8227,
"preview": "\"\"\"\n因子生成模型\n\n基于 Transformer 的策略网络,用于生成因子表达式 token 序列。\n\n架构:\n- Token Embedding + Position Embedding\n- Transformer Encoder(使"
},
{
"path": "backend/app/alpha_mining/model/trainer.py",
"chars": 9240,
"preview": "\"\"\"\n因子挖掘 RL 训练器\n\n使用 REINFORCE 算法训练 AlphaGenerator,以回测收益为奖励信号。\n\n训练流程:\n1. 生成因子表达式\n2. 执行表达式得到因子值\n3. 回测评估因子有效性(计算奖励)\n4. 策略梯度"
},
{
"path": "backend/app/alpha_mining/tools/__init__.py",
"chars": 151,
"preview": "\"\"\"\nAgenticX 工具封装\n\n将因子挖掘能力封装为 AgenticX Tool,供 QuantitativeAgent 调用。\n\"\"\"\n\nfrom .alpha_mining_tool import AlphaMiningTool\n"
},
{
"path": "backend/app/alpha_mining/tools/alpha_mining_tool.py",
"chars": 14234,
"preview": "\"\"\"\nAlpha Mining AgenticX 工具封装\n\n将因子挖掘功能封装为 AgenticX BaseTool,供 Agent 调用。\n\n支持的操作:\n- mine: 挖掘新因子\n- evaluate: 评估现有因子\n- list"
},
{
"path": "backend/app/alpha_mining/utils.py",
"chars": 4353,
"preview": "\"\"\"\nAlpha Mining 工具函数\n\n提供模拟数据生成、数据预处理等工具函数。\n\"\"\"\n\nimport torch\nimport numpy as np\nfrom typing import Tuple, Optional\nimpo"
},
{
"path": "backend/app/alpha_mining/vm/__init__.py",
"chars": 103,
"preview": "\"\"\"\n因子执行器模块\n\n提供 FactorVM 栈式虚拟机,用于执行因子表达式。\n\"\"\"\n\nfrom .factor_vm import FactorVM\n\n__all__ = [\"FactorVM\"]\n"
},
{
"path": "backend/app/alpha_mining/vm/factor_vm.py",
"chars": 6805,
"preview": "\"\"\"\n因子表达式执行器(栈式虚拟机)\n\n使用栈式执行方式解析和执行因子表达式 token 序列。\n\n执行流程:\n1. 遍历 token 序列\n2. 如果是特征 token:将对应特征数据入栈\n3. 如果是操作符 token:弹出所需参数,"
},
{
"path": "backend/app/api/__init__.py",
"chars": 15,
"preview": "\"\"\"\nAPI模块\n\"\"\"\n\n"
},
{
"path": "backend/app/api/v1/__init__.py",
"chars": 1162,
"preview": "\"\"\"\nAPI v1 模块\n\"\"\"\nfrom fastapi import APIRouter\nfrom . import analysis, tasks, llm_config, stocks, agents, debug, knowle"
},
{
"path": "backend/app/api/v1/agents.py",
"chars": 53853,
"preview": "\"\"\"\n智能体 API 路由 - Phase 2\n提供辩论功能、执行日志、性能监控等接口\n\"\"\"\nimport logging\nimport json\nimport asyncio\nfrom datetime import datetime"
},
{
"path": "backend/app/api/v1/alpha_mining.py",
"chars": 24404,
"preview": "\"\"\"\nAlpha Mining REST API\n\n提供因子挖掘相关的 HTTP 接口。\n\nEndpoints:\n- POST /alpha-mining/mine - 启动因子挖掘任务\n- POST /alpha-mining/mine"
},
{
"path": "backend/app/api/v1/analysis.py",
"chars": 9380,
"preview": "\"\"\"\n分析任务 API 路由\n\"\"\"\nimport logging\nimport asyncio\nimport json\nfrom typing import List, Optional\nfrom fastapi import APIR"
},
{
"path": "backend/app/api/v1/debug.py",
"chars": 5933,
"preview": "\"\"\"\n调试 API - 用于测试爬虫和内容提取\n\"\"\"\nimport re\nimport logging\nfrom typing import Optional\nfrom fastapi import APIRouter, HTTPExc"
},
{
"path": "backend/app/api/v1/knowledge_graph.py",
"chars": 11069,
"preview": "\"\"\"\n知识图谱管理 API\n提供图谱的查询、构建、更新、删除接口\n\"\"\"\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom fastapi import AP"
},
{
"path": "backend/app/api/v1/llm_config.py",
"chars": 3958,
"preview": "\"\"\"\nLLM 配置 API 路由\n返回可用的 LLM 厂商和模型列表\n\"\"\"\nimport logging\nfrom typing import List, Dict, Optional\nfrom fastapi import APIRo"
},
{
"path": "backend/app/api/v1/news.py",
"chars": 10830,
"preview": "\"\"\"\n新闻管理 API 路由\n\"\"\"\nimport logging\nfrom typing import List, Optional\nfrom datetime import datetime, timedelta\nfrom fasta"
},
{
"path": "backend/app/api/v1/news_v2.py",
"chars": 5385,
"preview": "\"\"\"\n新闻 API v2 - 使用新的 Financial Data Layer\n\n新功能:\n1. 多数据源支持:可指定 provider (sina, tencent, nbd...)\n2. 自动降级:一个源失败自动切换另一个\n3. 标"
},
{
"path": "backend/app/api/v1/stocks.py",
"chars": 29017,
"preview": "\"\"\"\n股票分析 API 路由 - Phase 2\n提供个股分析、关联新闻、情感趋势等接口\n支持 akshare 真实股票数据\n\"\"\"\nimport logging\nfrom datetime import datetime, timede"
},
{
"path": "backend/app/api/v1/tasks.py",
"chars": 8076,
"preview": "\"\"\"\n任务管理 API 路由\n\"\"\"\nimport logging\nfrom typing import List, Optional\nfrom fastapi import APIRouter, Depends, HTTPExcepti"
},
{
"path": "backend/app/config/__init__.py",
"chars": 4164,
"preview": "\"\"\"\n配置模块\n\"\"\"\nimport os\nfrom pathlib import Path\nfrom typing import Dict, Any, Optional, List\nimport yaml\nfrom pydantic i"
},
{
"path": "backend/app/config/debate_modes.yaml",
"chars": 3614,
"preview": "# 多智能体协作模式配置\n# 支持多种辩论/分析模式,可通过前端或API选择\n\n# 默认模式\ndefault_mode: parallel\n\nmodes:\n # ============ 并行分析模式(当前默认) ============"
},
{
"path": "backend/app/core/__init__.py",
"chars": 168,
"preview": "\"\"\"\n核心模块\n\"\"\"\nfrom .config import settings, get_settings\nfrom .database import get_db, init_database\n\n__all__ = [\"setting"
},
{
"path": "backend/app/core/celery_app.py",
"chars": 3290,
"preview": "\"\"\"\nCelery 应用配置\n\"\"\"\nfrom celery import Celery\nfrom celery.schedules import crontab\nfrom .config import settings\n\n# 创建 Ce"
},
{
"path": "backend/app/core/config.py",
"chars": 7947,
"preview": "\"\"\"\nFinnewsHunter 核心配置模块\n使用 Pydantic Settings 管理环境变量和配置\n\"\"\"\nfrom typing import Optional, List\nfrom pydantic import Field"
},
{
"path": "backend/app/core/database.py",
"chars": 1153,
"preview": "\"\"\"\n数据库连接和依赖注入\n\"\"\"\nfrom typing import AsyncGenerator\nfrom sqlalchemy.ext.asyncio import AsyncSession\n\nfrom ..models.data"
},
{
"path": "backend/app/core/neo4j_client.py",
"chars": 3685,
"preview": "\"\"\"\nNeo4j 图数据库客户端\n用于存储和查询公司知识图谱\n\"\"\"\nimport logging\nfrom typing import Optional, Dict, List, Any\nfrom neo4j import GraphD"
},
{
"path": "backend/app/core/redis_client.py",
"chars": 4748,
"preview": "\"\"\"\nRedis Client for Caching and Task Queue\n\"\"\"\nimport json\nimport logging\nfrom typing import Optional, Any\nfrom datetim"
},
{
"path": "backend/app/financial/__init__.py",
"chars": 806,
"preview": "\"\"\"\nFinnewsHunter 金融数据层\n\n借鉴 OpenBB 的 Provider-Fetcher 架构,提供:\n1. Standard Models: 统一的数据模型 (NewsData, StockPriceData 等)\n2."
},
{
"path": "backend/app/financial/models/__init__.py",
"chars": 430,
"preview": "\"\"\"\n金融数据标准模型\n\n借鉴 OpenBB Standard Models 设计:\n- QueryParams: 定义标准输入参数\n- Data: 定义标准输出字段\n\n所有 Provider 的 Fetcher 都使用这些标准模型,确保"
},
{
"path": "backend/app/financial/models/news.py",
"chars": 4783,
"preview": "\"\"\"\n金融新闻标准模型\n\n借鉴 OpenBB Standard Models 设计:\n- NewsQueryParams: 新闻查询参数标准模型\n- NewsData: 新闻数据标准模型\n\n所有 NewsProvider 的 Fetche"
},
{
"path": "backend/app/financial/models/stock.py",
"chars": 4223,
"preview": "\"\"\"\n股票数据标准模型\n\n借鉴 OpenBB Standard Models 设计:\n- StockQueryParams: 股票数据查询参数\n- StockPriceData: 股票价格数据 (K线)\n\n来源参考:\n- OpenBB: "
},
{
"path": "backend/app/financial/providers/__init__.py",
"chars": 445,
"preview": "\"\"\"\n数据源 Provider 模块\n\n每个 Provider 代表一个数据源(如 Sina, Tencent, AkShare),\n每个 Provider 下可以有多个 Fetcher,每个 Fetcher 对应一种数据类型。\n\n架构:"
},
{
"path": "backend/app/financial/providers/base.py",
"chars": 5993,
"preview": "\"\"\"\nProvider & Fetcher 基础抽象\n\n借鉴 OpenBB 的 TET (Transform-Extract-Transform) Pipeline:\n1. Transform Query: 将标准参数转换为 Provid"
},
{
"path": "backend/app/financial/providers/eastmoney/__init__.py",
"chars": 167,
"preview": "\"\"\"\n东方财富 Provider\n\"\"\"\nfrom .provider import EastmoneyProvider\nfrom .fetchers.news import EastmoneyNewsFetcher\n\n__all__ ="
},
{
"path": "backend/app/financial/providers/eastmoney/fetchers/__init__.py",
"chars": 97,
"preview": "\"\"\"\n东方财富 Fetchers\n\"\"\"\nfrom .news import EastmoneyNewsFetcher\n\n__all__ = [\"EastmoneyNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/eastmoney/fetchers/news.py",
"chars": 9020,
"preview": "\"\"\"\n东方财富新闻 Fetcher\n\n基于 TET Pipeline 实现\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom da"
},
{
"path": "backend/app/financial/providers/eastmoney/provider.py",
"chars": 729,
"preview": "\"\"\"\n东方财富 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom .fe"
},
{
"path": "backend/app/financial/providers/nbd/__init__.py",
"chars": 145,
"preview": "\"\"\"\n每日经济新闻 Provider\n\"\"\"\nfrom .provider import NbdProvider\nfrom .fetchers.news import NbdNewsFetcher\n\n__all__ = [\"NbdProv"
},
{
"path": "backend/app/financial/providers/nbd/fetchers/__init__.py",
"chars": 87,
"preview": "\"\"\"\n每日经济新闻 Fetchers\n\"\"\"\nfrom .news import NbdNewsFetcher\n\n__all__ = [\"NbdNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/nbd/fetchers/news.py",
"chars": 8986,
"preview": "\"\"\"\n每日经济新闻 Fetcher\n\n基于 TET Pipeline 实现\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom da"
},
{
"path": "backend/app/financial/providers/nbd/provider.py",
"chars": 701,
"preview": "\"\"\"\n每日经济新闻 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom ."
},
{
"path": "backend/app/financial/providers/netease/__init__.py",
"chars": 159,
"preview": "\"\"\"\n网易财经 Provider\n\"\"\"\nfrom .provider import NeteaseProvider\nfrom .fetchers.news import NeteaseNewsFetcher\n\n__all__ = [\"N"
},
{
"path": "backend/app/financial/providers/netease/fetchers/__init__.py",
"chars": 93,
"preview": "\"\"\"\n网易财经 Fetchers\n\"\"\"\nfrom .news import NeteaseNewsFetcher\n\n__all__ = [\"NeteaseNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/netease/fetchers/news.py",
"chars": 8800,
"preview": "\"\"\"\n网易财经新闻 Fetcher\n\n基于 TET Pipeline 实现\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom da"
},
{
"path": "backend/app/financial/providers/netease/provider.py",
"chars": 711,
"preview": "\"\"\"\n网易财经 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom .fe"
},
{
"path": "backend/app/financial/providers/sina/__init__.py",
"chars": 179,
"preview": "\"\"\"\n新浪财经 Provider\n\n提供:\n- 新闻数据 (news): SinaNewsFetcher\n\n从 tools/sina_crawler.py 迁移而来,保留核心逻辑,\n适配 TET Pipeline 架构。\n\"\"\"\nfrom"
},
{
"path": "backend/app/financial/providers/sina/fetchers/__init__.py",
"chars": 87,
"preview": "\"\"\"\n新浪财经 Fetchers\n\"\"\"\nfrom .news import SinaNewsFetcher\n\n__all__ = [\"SinaNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/sina/fetchers/news.py",
"chars": 13017,
"preview": "\"\"\"\n新浪财经新闻 Fetcher\n\n从 tools/sina_crawler.py 迁移而来,适配 TET Pipeline 架构。\n\n主要变更:\n- transform_query: 将 NewsQueryParams 转换为爬虫参数"
},
{
"path": "backend/app/financial/providers/sina/provider.py",
"chars": 746,
"preview": "\"\"\"\n新浪财经 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom .fe"
},
{
"path": "backend/app/financial/providers/tencent/__init__.py",
"chars": 159,
"preview": "\"\"\"\n腾讯财经 Provider\n\"\"\"\nfrom .provider import TencentProvider\nfrom .fetchers.news import TencentNewsFetcher\n\n__all__ = [\"T"
},
{
"path": "backend/app/financial/providers/tencent/fetchers/__init__.py",
"chars": 93,
"preview": "\"\"\"\n腾讯财经 Fetchers\n\"\"\"\nfrom .news import TencentNewsFetcher\n\n__all__ = [\"TencentNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/tencent/fetchers/news.py",
"chars": 9675,
"preview": "\"\"\"\n腾讯财经新闻 Fetcher\n\n基于 TET Pipeline 实现:\n- Transform Query: 转换标准参数为腾讯财经特定参数\n- Extract Data: 从腾讯财经抓取原始数据\n- Transform Data:"
},
{
"path": "backend/app/financial/providers/tencent/provider.py",
"chars": 720,
"preview": "\"\"\"\n腾讯财经 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom .fe"
},
{
"path": "backend/app/financial/providers/yicai/__init__.py",
"chars": 151,
"preview": "\"\"\"\n第一财经 Provider\n\"\"\"\nfrom .provider import YicaiProvider\nfrom .fetchers.news import YicaiNewsFetcher\n\n__all__ = [\"Yicai"
},
{
"path": "backend/app/financial/providers/yicai/fetchers/__init__.py",
"chars": 89,
"preview": "\"\"\"\n第一财经 Fetchers\n\"\"\"\nfrom .news import YicaiNewsFetcher\n\n__all__ = [\"YicaiNewsFetcher\"]\n"
},
{
"path": "backend/app/financial/providers/yicai/fetchers/news.py",
"chars": 8540,
"preview": "\"\"\"\n第一财经新闻 Fetcher\n\n基于 TET Pipeline 实现\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom da"
},
{
"path": "backend/app/financial/providers/yicai/provider.py",
"chars": 703,
"preview": "\"\"\"\n第一财经 Provider\n\"\"\"\nfrom typing import Dict, Type\n\nfrom ..base import BaseProvider, BaseFetcher, ProviderInfo\nfrom .fe"
},
{
"path": "backend/app/financial/registry.py",
"chars": 6463,
"preview": "\"\"\"\nProvider 注册中心\n\n支持:\n1. 动态注册/注销 Provider\n2. 根据数据类型获取 Fetcher\n3. 多 Provider 自动降级\n\n来源参考:\n- OpenBB: Provider Registry 机制\n"
},
{
"path": "backend/app/financial/tools.py",
"chars": 8196,
"preview": "\"\"\"\n金融数据工具 - 封装为 AgenticX BaseTool\n\n这些工具可以直接被 Agent 调用,内部使用 Provider Registry 获取数据。\n\n设计原则:\n- 继承 AgenticX BaseTool,保持与框架兼"
},
{
"path": "backend/app/knowledge/README.md",
"chars": 2620,
"preview": "# 知识图谱模块\n\n## 📊 概述\n\n知识图谱模块为每只股票构建动态的知识图谱,用于智能化的新闻检索和分析。\n\n## 🎯 核心功能\n\n### 1. 多维度知识建模\n\n为每家公司建立包含以下信息的知识图谱:\n\n- **名称变体**:公司简称、"
},
{
"path": "backend/app/knowledge/__init__.py",
"chars": 572,
"preview": "\"\"\"\n知识图谱模块\n\"\"\"\nfrom .graph_models import (\n CompanyNode,\n NameVariantNode,\n BusinessNode,\n IndustryNode,\n "
},
{
"path": "backend/app/knowledge/graph_models.py",
"chars": 6592,
"preview": "\"\"\"\n知识图谱数据模型\n定义公司知识图谱的节点和关系结构\n\"\"\"\nfrom typing import List, Dict, Any, Optional\nfrom pydantic import BaseModel, Field\nfro"
},
{
"path": "backend/app/knowledge/graph_service.py",
"chars": 18656,
"preview": "\"\"\"\n知识图谱服务\n提供公司知识图谱的创建、查询、更新操作\n\"\"\"\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom datetime import date"
},
{
"path": "backend/app/knowledge/knowledge_extractor.py",
"chars": 21623,
"preview": "\"\"\"\n知识提取器\n从多种数据源提取公司知识并构建图谱\n\"\"\"\nimport logging\nimport json\nfrom typing import List, Dict, Any, Optional\nfrom datetime im"
},
{
"path": "backend/app/knowledge/parallel_search.py",
"chars": 4233,
"preview": "\"\"\"\n并发多关键词检索策略\n基于知识图谱的关键词,并发调用多个搜索API\n\"\"\"\nimport logging\nimport asyncio\nfrom typing import List, Dict, Any, Set\nfrom con"
},
{
"path": "backend/app/main.py",
"chars": 8272,
"preview": "\"\"\"\nFinnewsHunter 主应用入口\n\"\"\"\nimport logging\nfrom contextlib import asynccontextmanager\nfrom fastapi import FastAPI, Reque"
},
{
"path": "backend/app/models/__init__.py",
"chars": 472,
"preview": "\"\"\"\n数据模型模块\n\"\"\"\nfrom .database import Base, get_async_session, get_sync_session, init_db\nfrom .news import News\nfrom .sto"
},
{
"path": "backend/app/models/analysis.py",
"chars": 2378,
"preview": "\"\"\"\n分析结果数据模型\n\"\"\"\nfrom datetime import datetime\nfrom sqlalchemy import Column, Integer, String, Text, DateTime, Float, Fo"
},
{
"path": "backend/app/models/crawl_task.py",
"chars": 3102,
"preview": "\"\"\"\n爬取任务数据模型\n\"\"\"\nfrom datetime import datetime\nfrom typing import Optional\nfrom sqlalchemy import Column, Integer, Strin"
},
{
"path": "backend/app/models/database.py",
"chars": 1966,
"preview": "\"\"\"\n数据库连接和会话管理\n\"\"\"\nfrom typing import AsyncGenerator\nfrom sqlalchemy import create_engine\nfrom sqlalchemy.ext.asyncio im"
},
{
"path": "backend/app/models/debate_history.py",
"chars": 1809,
"preview": "\"\"\"\n辩论历史数据模型\n\"\"\"\nfrom datetime import datetime\nfrom typing import List, Optional\nfrom sqlalchemy import Column, Integer,"
},
{
"path": "backend/app/models/news.py",
"chars": 2832,
"preview": "\"\"\"\n新闻数据模型 - Phase 2 索引优化\n\"\"\"\nfrom datetime import datetime\nfrom typing import List, Optional\nfrom sqlalchemy import Col"
},
{
"path": "backend/app/models/stock.py",
"chars": 1901,
"preview": "\"\"\"\n股票数据模型\n\"\"\"\nfrom datetime import datetime\nfrom sqlalchemy import Column, Integer, String, DateTime, Float\n\nfrom .data"
},
{
"path": "backend/app/scripts/init_stocks.py",
"chars": 11337,
"preview": "\"\"\"\n初始化股票数据脚本\n从 akshare 获取全部 A 股信息并存入 PostgreSQL\n\n使用方法:\n cd backend\n python -m app.scripts.init_stocks\n\"\"\"\nimport "
},
{
"path": "backend/app/services/__init__.py",
"chars": 408,
"preview": "\"\"\"\n服务模块\n\"\"\"\nfrom .llm_service import get_llm_provider, get_llm_service, LLMService\nfrom .embedding_service import get_e"
},
{
"path": "backend/app/services/analysis_service.py",
"chars": 9200,
"preview": "\"\"\"\n新闻分析服务\n协调智能体执行分析任务\n\"\"\"\nimport logging\nimport time\nfrom typing import Dict, Any, Optional\nfrom sqlalchemy.ext.asyncio"
},
{
"path": "backend/app/services/embedding_service.py",
"chars": 9951,
"preview": "\"\"\"\nEmbedding 服务封装\n使用 agenticx.embeddings.BailianEmbeddingProvider\n\"\"\"\nimport logging\nimport asyncio\nfrom typing import "
},
{
"path": "backend/app/services/llm_service.py",
"chars": 13860,
"preview": "\"\"\"\nLLM 服务封装\n\"\"\"\nimport logging\nfrom typing import Optional, Dict, Any, Union\nfrom agenticx import LiteLLMProvider, LLMR"
},
{
"path": "backend/app/services/stock_data_service.py",
"chars": 30365,
"preview": "\"\"\"\n股票数据服务 - 使用 akshare 获取真实股票数据\n\"\"\"\nimport logging\nfrom datetime import datetime, timedelta\nfrom typing import List, Op"
},
{
"path": "backend/app/storage/__init__.py",
"chars": 85,
"preview": "\"\"\"\n存储模块\n\"\"\"\nfrom .vector_storage import VectorStorage\n\n__all__ = [\"VectorStorage\"]\n\n"
},
{
"path": "backend/app/storage/vector_storage.py",
"chars": 7423,
"preview": "\"\"\"\n向量存储封装 - 直接使用 agenticx.storage.vectordb_storages.milvus.MilvusStorage\n提供简单的兼容性接口,充分利用 base 类的便利方法\n\"\"\"\nimport logging"
},
{
"path": "backend/app/tasks/__init__.py",
"chars": 160,
"preview": "\"\"\"\nCelery 任务模块\n\"\"\"\nfrom .crawl_tasks import realtime_crawl_task, cold_start_crawl_task\n\n__all__ = [\n \"realtime_crawl"
},
{
"path": "backend/app/tasks/crawl_tasks.py",
"chars": 39260,
"preview": "\"\"\"\nCelery 爬取任务 - Phase 2: 实时监控升级版 + 多源支持\n\"\"\"\nimport logging\nimport json\nfrom datetime import datetime, timedelta\nfrom t"
},
{
"path": "backend/app/tools/__init__.py",
"chars": 1132,
"preview": "\"\"\"\n工具模块\n\"\"\"\nfrom .crawler_base import BaseCrawler, NewsItem\nfrom .sina_crawler import SinaCrawlerTool, create_sina_craw"
},
{
"path": "backend/app/tools/bochaai_search.py",
"chars": 9363,
"preview": "\"\"\"\nBochaAI Web Search Tool\n用于定向搜索股票相关新闻\n\"\"\"\nimport json\nimport logging\nimport urllib.request\nimport urllib.error\nfrom t"
},
{
"path": "backend/app/tools/caijing_crawler.py",
"chars": 8476,
"preview": "\"\"\"\n财经网爬虫工具\n目标URL: https://www.caijing.com.cn/ (股市栏目)\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfro"
},
{
"path": "backend/app/tools/crawler_base.py",
"chars": 13318,
"preview": "\"\"\"\n爬虫基类\n符合 AgenticX BaseTool 协议\n\"\"\"\nimport time\nimport logging\nfrom typing import List, Dict, Any, Optional\nfrom datacl"
},
{
"path": "backend/app/tools/crawler_enhanced.py",
"chars": 25711,
"preview": "\"\"\"\n增强版爬虫模块\n整合 deer-flow、BasicWebCrawler 和现有爬虫的优点\n\n特性:\n1. 多引擎支持:本地爬取 + Jina Reader API + Playwright JS 渲染\n2. 智能内容提取:read"
},
{
"path": "backend/app/tools/dynamic_crawler_example.py",
"chars": 6927,
"preview": "\"\"\"\n动态网站爬虫示例 - 使用 Selenium\n适用于需要点击\"加载更多\"的网站\n\n依赖安装:\npip install selenium webdriver-manager\n\"\"\"\nimport logging\nfrom typing"
},
{
"path": "backend/app/tools/eastmoney_crawler.py",
"chars": 8989,
"preview": "\"\"\"\n东方财富爬虫工具\n目标URL: https://stock.eastmoney.com/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom dat"
},
{
"path": "backend/app/tools/eeo_crawler.py",
"chars": 16415,
"preview": "\"\"\"\n经济观察网爬虫工具\n目标URL: https://www.eeo.com.cn/jg/jinrong/zhengquan/\n\"\"\"\nimport re\nimport json\nimport logging\nfrom typing i"
},
{
"path": "backend/app/tools/interactive_crawler.py",
"chars": 32332,
"preview": "\"\"\"\n交互式网页爬虫\n使用 requests + BeautifulSoup 进行网页爬取\n特别用于搜索结果补充,当 BochaAI 结果不足时使用\n\n注意:主要搜索引擎(Bing、百度)都有反爬机制,本模块已做相应优化:\n1. 模拟真实"
},
{
"path": "backend/app/tools/jingji21_crawler.py",
"chars": 7203,
"preview": "\"\"\"\n21经济网爬虫工具\n目标URL: https://www.21jingji.com/ (证券栏目)\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfro"
},
{
"path": "backend/app/tools/jwview_crawler.py",
"chars": 7255,
"preview": "\"\"\"\n中新经纬爬虫工具\n目标URL: https://www.jwview.com/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom datetime"
},
{
"path": "backend/app/tools/nbd_crawler.py",
"chars": 9957,
"preview": "\"\"\"\n每日经济新闻爬虫工具\n目标URL: https://finance.nbd.com.cn/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom da"
},
{
"path": "backend/app/tools/netease163_crawler.py",
"chars": 6712,
"preview": "\"\"\"\n网易财经爬虫工具\n目标URL: https://money.163.com/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom datetime "
},
{
"path": "backend/app/tools/search_engine_crawler.py",
"chars": 8774,
"preview": "\"\"\"\n搜索引擎爬虫工具\n直接爬取搜索引擎结果页面(Bing/Baidu)\n\"\"\"\nimport logging\nimport re\nimport requests\nfrom typing import List, Dict, Any, O"
},
{
"path": "backend/app/tools/sina_crawler.py",
"chars": 10738,
"preview": "\"\"\"\n新浪财经爬虫工具\n重构自 legacy_v1/Crawler/crawler_sina.py\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom d"
},
{
"path": "backend/app/tools/tencent_crawler.py",
"chars": 17425,
"preview": "\"\"\"\n腾讯财经爬虫工具\n目标URL: https://news.qq.com/ch/finance/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfrom "
},
{
"path": "backend/app/tools/text_cleaner.py",
"chars": 5934,
"preview": "\"\"\"\n文本清洗工具\n重构自 legacy_v1/src/Killua/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Set\nimport jieba\n\nfrom agenti"
},
{
"path": "backend/app/tools/yicai_crawler.py",
"chars": 6537,
"preview": "\"\"\"\n第一财经爬虫工具\n目标URL: https://www.yicai.com/news/gushi/\n\"\"\"\nimport re\nimport logging\nfrom typing import List, Optional\nfro"
},
{
"path": "backend/clear_news_data.py",
"chars": 2725,
"preview": "\"\"\"\n清除所有新闻相关数据\n\"\"\"\nimport os\nimport sys\nfrom pathlib import Path\n\n# 加载环境变量\nfrom dotenv import load_dotenv\nenv_path = Pat"
},
{
"path": "backend/env.example",
"chars": 4095,
"preview": "# FinnewsHunter 环境变量配置模板\n# 复制此文件为 .env 并填入实际值\n\n# ===== 应用配置 =====\nAPP_NAME=FinnewsHunter\nAPP_VERSION=0.1.0\nDEBUG=True\n\n#"
},
{
"path": "backend/init_db.py",
"chars": 1842,
"preview": "#!/usr/bin/env python\n\"\"\"\n数据库初始化脚本\n独立运行以创建数据库表\n\"\"\"\nimport sys\nimport os\n\n# 添加当前目录到 Python 路径\nsys.path.insert(0, os.path."
},
{
"path": "backend/init_knowledge_graph.py",
"chars": 3513,
"preview": "#!/usr/bin/env python\n\"\"\"\n初始化知识图谱\n创建 Neo4j 约束、索引,并为示例股票构建图谱\n\"\"\"\nimport asyncio\nimport logging\nimport sys\n\n# 配置日志\nlogging"
},
{
"path": "backend/requirements.txt",
"chars": 985,
"preview": "# ===== Web 框架 =====\nfastapi>=0.100.0\nuvicorn[standard]>=0.22.0\npydantic>=2.0.0\npydantic-settings>=2.0.0\npython-dotenv>="
},
{
"path": "backend/reset_database.py",
"chars": 3049,
"preview": "\"\"\"\n清空数据库并重新开始\n用于重置系统数据\n\"\"\"\nimport asyncio\nimport sys\nfrom sqlalchemy import text\nfrom app.core.database import get_asyn"
},
{
"path": "backend/setup_env.sh",
"chars": 2728,
"preview": "#!/bin/bash\n# 环境变量快速配置脚本\n\necho \"============================================\"\necho \" FinnewsHunter 环境配置向导\"\necho \"======"
},
{
"path": "backend/start.sh",
"chars": 961,
"preview": "#!/bin/bash\n# FinnewsHunter 启动脚本\n\nset -e\n\necho \"===================================\"\necho \" FinnewsHunter Backend Start"
},
{
"path": "backend/start_celery.sh",
"chars": 6098,
"preview": "#!/bin/bash\n# Celery 容器化重启脚本\n# 用法: ./start_celery.sh [--restart|-r] [--force-recreate|-f] [--rebuild|-b] [--logs|-l]\n\nse"
},
{
"path": "backend/tests/__init__.py",
"chars": 26,
"preview": "\"\"\"FinnewsHunter Tests\"\"\"\n"
},
{
"path": "backend/tests/check_milvus_data.py",
"chars": 3385,
"preview": "#!/usr/bin/env python3\n\"\"\"\n检查 Milvus 向量存储中的数据\n\"\"\"\nimport sys\nimport os\nimport asyncio\n\n# 添加项目路径\nsys.path.insert(0, os.pa"
},
{
"path": "backend/tests/check_news_embedding_status.py",
"chars": 2791,
"preview": "#!/usr/bin/env python3\n\"\"\"\n检查新闻的向量化状态\n\"\"\"\nimport sys\nimport os\nimport asyncio\n\n# 添加项目路径\nsys.path.insert(0, os.path.dirna"
},
{
"path": "backend/tests/financial/__init__.py",
"chars": 29,
"preview": "\"\"\"Financial module tests\"\"\"\n"
},
{
"path": "backend/tests/financial/test_smoke_openbb_models.py",
"chars": 6456,
"preview": "\"\"\"\n冒烟测试: Standard Models (P0-1, P0-2)\n\n验证:\n- NewsQueryParams, NewsData 模型可正常实例化\n- StockQueryParams, StockPriceData 模型可正"
},
{
"path": "backend/tests/financial/test_smoke_openbb_provider.py",
"chars": 10719,
"preview": "\"\"\"\n冒烟测试: Provider & Registry (P0-3, P0-4)\n\n验证:\n- BaseFetcher 抽象类可被正确继承\n- BaseProvider 抽象类可被正确继承\n- ProviderRegistry 注册/获"
},
{
"path": "backend/tests/financial/test_smoke_openbb_tools.py",
"chars": 5501,
"preview": "\"\"\"\n冒烟测试: Financial Tools (P1-2)\n\n验证:\n- FinancialNewsTool 可正常实例化\n- Tool 在无 Provider 时返回错误而非崩溃\n- Tool 正确调用 Registry\n\n运行:\n"
},
{
"path": "backend/tests/manual_vectorize.py",
"chars": 7238,
"preview": "#!/usr/bin/env python3\n\"\"\"\n手动向量化新闻(用于修复未向量化的新闻)\n\"\"\"\nimport sys\nimport os\nimport asyncio\nimport logging\n\n# 添加项目路径\nsys.pat"
},
{
"path": "backend/tests/test_alpha_mining/__init__.py",
"chars": 24,
"preview": "\"\"\"Alpha Mining 测试模块\"\"\"\n"
},
{
"path": "backend/tests/test_alpha_mining/test_integration_p2.py",
"chars": 14008,
"preview": "\"\"\"\nP2 集成测试 - Alpha Mining 完整集成\n\n测试覆盖:\n- F18: QuantitativeAgent 集成\n- F19: REST API 端点\n- 完整工作流测试\n\"\"\"\n\nimport pytest\nimpor"
},
{
"path": "backend/tests/test_alpha_mining/test_smoke_p0.py",
"chars": 14407,
"preview": "\"\"\"\nP0 冒烟测试 - Alpha Mining 核心机制\n\n测试覆盖:\n- F02: 配置模块\n- F03-F04: 操作符和时序函数\n- F05: 词汇表\n- F06-F07: FactorVM 执行和解码\n- F08-F09: A"
},
{
"path": "backend/tests/test_alpha_mining/test_smoke_p1.py",
"chars": 12723,
"preview": "\"\"\"\nP1 冒烟测试 - Alpha Mining 数据集成\n\n测试覆盖:\n- F13: MarketFeatureBuilder\n- F14: SentimentFeatureBuilder\n- F15: FactorEvaluator"
},
{
"path": "backend/tests/test_smoke_alpha_mining.py",
"chars": 11112,
"preview": "\"\"\"\nAlpha Mining 模块冒烟测试\n\n测试覆盖:\n1. DSL 操作符执行\n2. 因子虚拟机(FactorVM)\n3. 因子生成模型(AlphaGenerator)\n4. RL 训练器(AlphaTrainer)\n5. 因子评估"
},
{
"path": "deploy/Dockerfile.celery",
"chars": 552,
"preview": "FROM python:3.11\n\nWORKDIR /app\n\n# 复制requirements文件和entrypoint脚本\nCOPY backend/requirements.txt /app/requirements.txt\nCOPY"
},
{
"path": "deploy/celery-entrypoint.sh",
"chars": 561,
"preview": "#!/bin/bash\nset -e\n\n# 开发环境:检查依赖是否已安装(通过检查关键包)\n# 注意:由于 volumes 挂载会覆盖 /app,构建时安装的依赖可能不可见\n# 这个脚本确保在开发环境中依赖总是可用的\nCHECK_PACKA"
},
{
"path": "deploy/docker-compose.dev.yml",
"chars": 5775,
"preview": "version: '3.8'\n\nservices:\n postgres:\n image: postgres:15-alpine\n container_name: finnews_postgres\n environment"
},
{
"path": "docs/BochaAI_Web_Search_API_20251222_121535.md",
"chars": 1174,
"preview": "# BochaAI_Web_Search_API\n\n> 来源: https://bocha-ai.feishu.cn/wiki/RXEOw02rFiwzGSkd9mUcqoeAnNK\n> 爬取时间: 2025-12-22 12:15:35\n"
},
{
"path": "docs/天眼查MCP服务_20260104_171528.md",
"chars": 4839,
"preview": "# 天眼查MCP服务\n\n> 来源: https://bigmodel.cn/marketplace/detail/1846da9039e4\n> 爬取时间: 2026-01-04 17:15:28\n> 方式: 浏览器提取\n\n---\n\n控制台\n"
},
{
"path": "frontend/.gitignore",
"chars": 254,
"preview": "# Logs\nlogs\n*.log\nnpm-debug.log*\nyarn-debug.log*\nyarn-error.log*\npnpm-debug.log*\nlerna-debug.log*\n\nnode_modules\ndist\ndis"
},
{
"path": "frontend/QUICKSTART.md",
"chars": 833,
"preview": "# FinnewsHunter Frontend 快速启动\n\n## 🚀 5分钟启动\n\n### 1. 安装依赖\n\n```bash\nnpm install\n```\n\n### 2. 配置环境变量\n\n```bash\ncp .env.example "
},
{
"path": "frontend/README.md",
"chars": 2187,
"preview": "# FinnewsHunter Frontend (React + TypeScript)\n\n现代化的金融新闻智能分析平台前端,基于 **React 18 + TypeScript + Vite + Tailwind CSS + Shadc"
},
{
"path": "frontend/index.html",
"chars": 379,
"preview": "<!doctype html>\n<html lang=\"zh-CN\">\n <head>\n <meta charset=\"UTF-8\" />\n <link rel=\"icon\" type=\"image/svg+xml\" href"
},
{
"path": "frontend/package.json",
"chars": 1940,
"preview": "{\n \"name\": \"finnews-hunter-frontend\",\n \"private\": true,\n \"version\": \"0.1.0\",\n \"type\": \"module\",\n \"scripts\": {\n \""
},
{
"path": "frontend/postcss.config.js",
"chars": 81,
"preview": "export default {\n plugins: {\n tailwindcss: {},\n autoprefixer: {},\n },\n}\n\n"
},
{
"path": "frontend/src/App.tsx",
"chars": 1153,
"preview": "import { Routes, Route } from 'react-router-dom'\nimport { Toaster } from 'sonner'\nimport MainLayout from './layout/MainL"
},
{
"path": "frontend/src/components/DebateChatRoom.tsx",
"chars": 20137,
"preview": "import React, { useState, useRef, useEffect, useCallback } from 'react'\nimport { \n Send, User, TrendingUp, TrendingDown"
},
{
"path": "frontend/src/components/DebateConfig.tsx",
"chars": 13508,
"preview": "/**\n * 辩论模式配置组件\n * 支持选择不同的多智能体协作模式\n */\nimport React, { useState, useEffect } from 'react'\nimport {\n Settings,\n Zap,\n "
},
{
"path": "frontend/src/components/DebateHistorySidebar.tsx",
"chars": 15143,
"preview": "import React, { useState, useMemo } from 'react'\nimport { \n History, \n Trash2, \n MessageSquare, \n Clock, \n PlayCirc"
},
{
"path": "frontend/src/components/HighlightText.tsx",
"chars": 1572,
"preview": "import React from 'react'\n\ninterface HighlightTextProps {\n text: string\n highlight: string\n className?: string\n}\n\n/**"
},
{
"path": "frontend/src/components/KLineChart.tsx",
"chars": 12226,
"preview": "/**\n * KLineChart 组件\n * 使用 klinecharts 库展示专业的 K 线图\n * 支持:蜡烛图、成交量、MA均线、MACD等\n */\nimport { useEffect, useRef, useCallback,"
},
{
"path": "frontend/src/components/MentionInput.tsx",
"chars": 13330,
"preview": "import React, { useState, useRef, useEffect, useCallback, useMemo } from 'react'\nimport { \n TrendingUp, \n TrendingDown"
},
{
"path": "frontend/src/components/ModelSelector.tsx",
"chars": 8761,
"preview": "import { useState, useEffect, useMemo } from 'react'\nimport { useQuery } from '@tanstack/react-query'\nimport { Button } "
},
{
"path": "frontend/src/components/NewsDetailDrawer.tsx",
"chars": 24245,
"preview": "import { useQuery } from '@tanstack/react-query'\nimport { useState, useEffect } from 'react'\nimport { toast } from 'sonn"
},
{
"path": "frontend/src/components/StockSearch.tsx",
"chars": 9370,
"preview": "/**\n * 股票搜索组件\n * 支持代码和名称模糊搜索\n */\nimport { useState, useCallback, useRef, useEffect } from 'react'\nimport { useQuery, use"
},
{
"path": "frontend/src/components/alpha-mining/AgentDemo.tsx",
"chars": 13966,
"preview": "/**\n * AgenticX Agent 调用演示组件\n * \n * 展示如何通过 Agent 接口调用 AlphaMiningTool:\n * - Agent 调用流程可视化\n * - Tool 参数输入面板\n * - 执行日志流式显示"
},
{
"path": "frontend/src/components/alpha-mining/MetricsDashboard.tsx",
"chars": 16498,
"preview": "/**\n * 完整评估指标仪表盘\n * \n * 展示因子评估的所有指标:\n * - 雷达图:多维度指标可视化\n * - 收益曲线:策略收益 vs 基准\n * - 风险指标卡片\n */\n\nimport React from 'react';\n"
},
{
"path": "frontend/src/components/alpha-mining/OperatorGrid.tsx",
"chars": 12115,
"preview": "/**\n * DSL 操作符可视化组件\n * \n * 展示 21 个因子操作符,按类别分组显示\n * 支持点击插入到因子表达式输入框\n */\n\nimport React, { useState } from 'react';\nimport "
},
{
"path": "frontend/src/components/alpha-mining/SentimentCompare.tsx",
"chars": 12950,
"preview": "/**\n * 情感融合效果对比组件\n * \n * 对比纯技术因子 vs 情感增强因子的效果:\n * - 左右两栏对比\n * - 指标对比条形图\n * - 改进幅度高亮\n */\n\nimport React, { useState, useCa"
},
{
"path": "frontend/src/components/alpha-mining/TrainingMonitor.tsx",
"chars": 13497,
"preview": "/**\n * 训练进度实时监控组件\n * \n * 使用 SSE 订阅训练进度,实时显示:\n * - 训练步数/进度\n * - Loss/Reward 曲线\n * - 当前最优因子表达式\n */\n\nimport React, { useSta"
},
{
"path": "frontend/src/components/alpha-mining/index.ts",
"chars": 473,
"preview": "/**\n * Alpha Mining 组件导出\n */\n\nexport { default as OperatorGrid, FEATURES } from './OperatorGrid';\nexport type { Operator"
},
{
"path": "frontend/src/components/ui/badge.tsx",
"chars": 1349,
"preview": "import * as React from \"react\"\nimport { cva, type VariantProps } from \"class-variance-authority\"\n\nimport { cn } from \"@/"
},
{
"path": "frontend/src/components/ui/button.tsx",
"chars": 1902,
"preview": "import * as React from \"react\"\nimport { Slot } from \"@radix-ui/react-slot\"\nimport { cva, type VariantProps } from \"class"
},
{
"path": "frontend/src/components/ui/card.tsx",
"chars": 1878,
"preview": "import * as React from \"react\"\n\nimport { cn } from \"@/lib/utils\"\n\nconst Card = React.forwardRef<\n HTMLDivElement,\n Rea"
},
{
"path": "frontend/src/components/ui/dropdown-menu.tsx",
"chars": 7303,
"preview": "import * as React from \"react\"\nimport * as DropdownMenuPrimitive from \"@radix-ui/react-dropdown-menu\"\nimport { Check, Ch"
},
{
"path": "frontend/src/components/ui/sheet.tsx",
"chars": 4892,
"preview": "import * as React from \"react\"\nimport { X } from \"lucide-react\"\nimport { cn } from \"@/lib/utils\"\n\ninterface SheetContext"
},
{
"path": "frontend/src/components/ui/tabs.tsx",
"chars": 1882,
"preview": "import * as React from \"react\"\nimport * as TabsPrimitive from \"@radix-ui/react-tabs\"\nimport { cn } from \"@/lib/utils\"\n\nc"
},
{
"path": "frontend/src/context/NewsToolbarContext.tsx",
"chars": 745,
"preview": "import React, { createContext, useContext, useState } from 'react'\n\ninterface ToolbarContent {\n left?: React.ReactNode "
},
{
"path": "frontend/src/hooks/useDebounce.ts",
"chars": 870,
"preview": "import { useState, useEffect } from 'react'\n\n/**\n * useDebounce Hook\n * \n * 用于延迟处理快速变化的值(如搜索输入),避免频繁触发计算或API请求\n * \n * @p"
},
{
"path": "frontend/src/index.css",
"chars": 1568,
"preview": "@tailwind base;\n@tailwind components;\n@tailwind utilities;\n\n@layer base {\n :root {\n --background: 0 0% 100%;\n --f"
},
{
"path": "frontend/src/layout/MainLayout.tsx",
"chars": 4599,
"preview": "import { Outlet, Link, useLocation } from 'react-router-dom'\nimport { Home, Newspaper, TrendingUp, Activity, Settings, B"
},
{
"path": "frontend/src/lib/api-client.ts",
"chars": 28228,
"preview": "import axios from 'axios'\nimport type {\n News,\n Analysis,\n CrawlTask,\n TaskStats,\n CrawlRequest,\n CrawlResponse,\n "
},
{
"path": "frontend/src/lib/utils.ts",
"chars": 1305,
"preview": "import { type ClassValue, clsx } from \"clsx\"\nimport { twMerge } from \"tailwind-merge\"\n\nexport function cn(...inputs: Cla"
},
{
"path": "frontend/src/main.tsx",
"chars": 667,
"preview": "import React from 'react'\nimport ReactDOM from 'react-dom/client'\nimport { BrowserRouter } from 'react-router-dom'\nimpor"
},
{
"path": "frontend/src/pages/AgentMonitorPage.tsx",
"chars": 22423,
"preview": "import { useState, useEffect } from 'react'\nimport { useQuery, useMutation, useQueryClient } from '@tanstack/react-query"
},
{
"path": "frontend/src/pages/AlphaMiningPage.tsx",
"chars": 15988,
"preview": "/**\n * Alpha Mining 因子挖掘页面(增强版)\n * \n * 技术亮点展示:\n * - 符号回归 + RL: Transformer 策略网络 + REINFORCE 算法\n * - DSL 系统: 21 个时序/算术/条件"
},
{
"path": "frontend/src/pages/Dashboard.tsx",
"chars": 13845,
"preview": "import { useQuery } from '@tanstack/react-query'\nimport { Card, CardContent, CardDescription, CardHeader, CardTitle } fr"
},
{
"path": "frontend/src/pages/NewsListPage.tsx",
"chars": 40474,
"preview": "import { useState, useEffect, useMemo, useRef, useCallback } from 'react'\nimport { useQuery, useMutation, useQueryClient"
},
{
"path": "frontend/src/pages/StockAnalysisPage.tsx",
"chars": 105517,
"preview": "import { useState, useEffect, useMemo, useRef, useCallback } from 'react'\nimport { useParams, useNavigate } from 'react-"
},
{
"path": "frontend/src/pages/StockSearchPage.tsx",
"chars": 14763,
"preview": "/**\n * 股票搜索入口页面\n * 风格参考 Manus/ChatGPT 的对话入口\n */\nimport { useState, useCallback, useRef, useEffect } from 'react'\nimport "
},
{
"path": "frontend/src/pages/TaskManagerPage.tsx",
"chars": 4169,
"preview": "import { useQuery } from '@tanstack/react-query'\nimport { Card, CardContent, CardHeader, CardTitle } from '@/components/"
}
]
// ... and 93 more files (download for full content)
About this extraction
This page contains the full source code of the DemonDamon/Listed-company-news-crawl-and-text-analysis GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 293 files (2.5 MB), approximately 671.7k tokens, and a symbol index with 1422 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.