Showing preview only (4,908K chars total). Download the full file or copy to clipboard to get everything.
Repository: microsoft/RD-Agent
Branch: main
Commit: 471eb30d1ec0
Files: 835
Total size: 4.5 MB
Directory structure:
gitextract_5wquvenf/
├── .bumpversion.cfg
├── .commitlintrc.js
├── .devcontainer/
│ ├── Dockerfile
│ ├── README.md
│ ├── devcontainer.json
│ └── env
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug-report.md
│ │ ├── documentation.md
│ │ ├── feature-request.md
│ │ └── question.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── ci.yml
│ ├── pr.yml
│ ├── readthedocs-preview.yml
│ └── release.yml
├── .gitignore
├── .readthedocs.yaml
├── .streamlit/
│ └── config.toml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── TODO.md
├── constraints/
│ ├── 3.10.txt
│ └── 3.11.txt
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ └── RD2bench.json
│ ├── api_reference.rst
│ ├── changelog.md
│ ├── conf.py
│ ├── development.rst
│ ├── index.rst
│ ├── installation_and_configuration.rst
│ ├── introduction.rst
│ ├── make.bat
│ ├── policy.rst
│ ├── project_framework_introduction.rst
│ ├── requirements.txt
│ ├── research/
│ │ ├── benchmark.rst
│ │ ├── catalog.rst
│ │ └── dev.rst
│ ├── scens/
│ │ ├── catalog.rst
│ │ ├── data_agent_fin.rst
│ │ ├── data_copilot_fin.rst
│ │ ├── data_science.rst
│ │ ├── finetune.rst
│ │ ├── model_agent_fin.rst
│ │ ├── model_copilot_general.rst
│ │ └── quant_agent_fin.rst
│ └── ui.rst
├── pyproject.toml
├── rdagent/
│ ├── app/
│ │ ├── CI/
│ │ │ ├── README.md
│ │ │ ├── ci.ipynb
│ │ │ ├── prompts.yaml
│ │ │ └── run.py
│ │ ├── benchmark/
│ │ │ ├── factor/
│ │ │ │ ├── analysis.py
│ │ │ │ └── eval.py
│ │ │ └── model/
│ │ │ ├── README.md
│ │ │ └── eval.py
│ │ ├── cli.py
│ │ ├── data_science/
│ │ │ ├── conf.py
│ │ │ ├── debug.py
│ │ │ └── loop.py
│ │ ├── finetune/
│ │ │ ├── data_science/
│ │ │ │ ├── conf.py
│ │ │ │ ├── loop.py
│ │ │ │ ├── scen.py
│ │ │ │ └── tpl/
│ │ │ │ ├── components/
│ │ │ │ │ └── coder/
│ │ │ │ │ └── data_science/
│ │ │ │ │ └── pipeline/
│ │ │ │ │ └── prompts.yaml
│ │ │ │ └── scenarios/
│ │ │ │ └── data_science/
│ │ │ │ └── proposal/
│ │ │ │ └── exp_gen/
│ │ │ │ └── prompts_v2.yaml
│ │ │ ├── llm/
│ │ │ │ ├── README.md
│ │ │ │ ├── conf.py
│ │ │ │ ├── job/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_ft_job.sh
│ │ │ │ │ ├── scenarios.json
│ │ │ │ │ └── tasks.json.example
│ │ │ │ ├── loop.py
│ │ │ │ └── ui/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── app.py
│ │ │ │ ├── benchmarks/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base.py
│ │ │ │ │ ├── bioprobench.py
│ │ │ │ │ ├── chemcotbench.py
│ │ │ │ │ ├── financeiq.py
│ │ │ │ │ ├── panorama.py
│ │ │ │ │ └── tablebench.py
│ │ │ │ ├── components.py
│ │ │ │ ├── config.py
│ │ │ │ ├── data_loader.py
│ │ │ │ └── ft_summary.py
│ │ │ └── share/
│ │ │ ├── eval.py
│ │ │ └── prompts.yaml
│ │ ├── general_model/
│ │ │ └── general_model.py
│ │ ├── kaggle/
│ │ │ ├── conf.py
│ │ │ └── loop.py
│ │ ├── qlib_rd_loop/
│ │ │ ├── conf.py
│ │ │ ├── factor.py
│ │ │ ├── factor_from_report.py
│ │ │ ├── model.py
│ │ │ ├── prompts.yaml
│ │ │ └── quant.py
│ │ ├── rl/
│ │ │ ├── conf.py
│ │ │ ├── loop.py
│ │ │ └── ui/
│ │ │ ├── __init__.py
│ │ │ ├── app.py
│ │ │ ├── components.py
│ │ │ ├── config.py
│ │ │ ├── data_loader.py
│ │ │ └── rl_summary.py
│ │ └── utils/
│ │ ├── ape.py
│ │ ├── health_check.py
│ │ ├── info.py
│ │ ├── prompts.yaml
│ │ ├── ws.py
│ │ └── ws_ft.py
│ ├── components/
│ │ ├── agent/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── context7/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── mcp/
│ │ │ │ └── __init__.py
│ │ │ └── rag/
│ │ │ ├── __init__.py
│ │ │ └── conf.py
│ │ ├── benchmark/
│ │ │ ├── __init__.py
│ │ │ ├── conf.py
│ │ │ ├── configs/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── models.yaml
│ │ │ │ └── opencompass_template.yaml
│ │ │ ├── eval_method.py
│ │ │ ├── example.json
│ │ │ └── utils.py
│ │ ├── coder/
│ │ │ ├── CoSTEER/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolvable_subjects.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── knowledge_management.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── task.py
│ │ │ ├── data_science/
│ │ │ │ ├── conf.py
│ │ │ │ ├── ensemble/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── conf.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── ensemble_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── feature/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── feature_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── model/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── model_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── pipeline/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── submission_format_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ └── prompts.yaml
│ │ │ │ ├── raw_data_loader/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── conf.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── data_loader_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── share/
│ │ │ │ │ ├── doc.py
│ │ │ │ │ ├── ds_costeer.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── notebook.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── util.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workflow/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── eval.py
│ │ │ │ ├── eval_tests/
│ │ │ │ │ └── submission_format_test.txt
│ │ │ │ ├── exp.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── test.py
│ │ │ ├── factor_coder/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── eva_utils.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── factor.py
│ │ │ │ ├── factor_execution_template.txt
│ │ │ │ └── prompts.yaml
│ │ │ ├── finetune/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ ├── eval.py
│ │ │ │ ├── exp.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── unified_validator.py
│ │ │ ├── model_coder/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark/
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── gt_code/
│ │ │ │ │ │ ├── A-DGN.py
│ │ │ │ │ │ ├── dirgnn.py
│ │ │ │ │ │ ├── gpsconv.py
│ │ │ │ │ │ ├── linkx.py
│ │ │ │ │ │ ├── pmlp.py
│ │ │ │ │ │ └── visnet.py
│ │ │ │ │ └── model_dict.json
│ │ │ │ ├── conf.py
│ │ │ │ ├── eva_utils.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── gt_code.py
│ │ │ │ ├── model.py
│ │ │ │ ├── model_execute_template_v1.txt
│ │ │ │ ├── model_execute_template_v2.txt
│ │ │ │ ├── one_shot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── prompt.yaml
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── task_loader.py
│ │ │ └── rl/
│ │ │ ├── __init__.py
│ │ │ ├── costeer.py
│ │ │ └── prompts.yaml
│ │ ├── document_reader/
│ │ │ └── document_reader.py
│ │ ├── interactor/
│ │ │ └── __init__.py
│ │ ├── knowledge_management/
│ │ │ ├── graph.py
│ │ │ └── vector_base.py
│ │ ├── loader/
│ │ │ ├── experiment_loader.py
│ │ │ └── task_loader.py
│ │ ├── proposal/
│ │ │ ├── __init__.py
│ │ │ └── prompts.yaml
│ │ ├── runner/
│ │ │ └── __init__.py
│ │ └── workflow/
│ │ ├── conf.py
│ │ └── rd_loop.py
│ ├── core/
│ │ ├── conf.py
│ │ ├── developer.py
│ │ ├── evaluation.py
│ │ ├── evolving_agent.py
│ │ ├── evolving_framework.py
│ │ ├── exception.py
│ │ ├── experiment.py
│ │ ├── interactor.py
│ │ ├── knowledge_base.py
│ │ ├── prompts.py
│ │ ├── proposal.py
│ │ ├── scenario.py
│ │ └── utils.py
│ ├── log/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── conf.py
│ │ ├── logger.py
│ │ ├── mle_summary.py
│ │ ├── server/
│ │ │ ├── README.md
│ │ │ ├── app.py
│ │ │ └── debug_app.py
│ │ ├── storage.py
│ │ ├── timer.py
│ │ ├── ui/
│ │ │ ├── __init__.py
│ │ │ ├── aide.py
│ │ │ ├── app.py
│ │ │ ├── conf.py
│ │ │ ├── ds_summary.py
│ │ │ ├── ds_trace.py
│ │ │ ├── ds_user_interact.py
│ │ │ ├── dsapp.py
│ │ │ ├── llm_st.py
│ │ │ ├── qlib_report_figure.py
│ │ │ ├── st_fixed_container.py
│ │ │ ├── storage.py
│ │ │ ├── utils.py
│ │ │ └── web.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ └── folder.py
│ ├── oai/
│ │ ├── backend/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── deprec.py
│ │ │ ├── litellm.py
│ │ │ └── pydantic_ai.py
│ │ ├── llm_conf.py
│ │ ├── llm_utils.py
│ │ └── utils/
│ │ └── embedding.py
│ ├── scenarios/
│ │ ├── data_science/
│ │ │ ├── __init__.py
│ │ │ ├── debug/
│ │ │ │ └── data.py
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── runner/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── eval.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── eval_tests/
│ │ │ │ └── mle_submission_format_test.txt
│ │ │ ├── example/
│ │ │ │ ├── README.md
│ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ ├── description.md
│ │ │ │ │ └── sample.py
│ │ │ │ ├── eval/
│ │ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ │ ├── grade.py
│ │ │ │ │ │ └── valid.py
│ │ │ │ │ └── playground-series-s4e9/
│ │ │ │ │ ├── grade.py
│ │ │ │ │ └── valid.py
│ │ │ │ ├── playground-series-s4e9/
│ │ │ │ │ └── description.md
│ │ │ │ └── source_data/
│ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ └── prepare.py
│ │ │ │ └── playground-series-s4e9/
│ │ │ │ └── prepare.py
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ └── experiment.py
│ │ │ ├── interactor/
│ │ │ │ └── __init__.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── __init__.py
│ │ │ │ └── exp_gen/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── diversity_strategy.py
│ │ │ │ ├── draft/
│ │ │ │ │ ├── draft.py
│ │ │ │ │ └── prompts_draft.yaml
│ │ │ │ ├── idea_pool.py
│ │ │ │ ├── merge.py
│ │ │ │ ├── merge.yaml
│ │ │ │ ├── naive.py
│ │ │ │ ├── naive.yaml
│ │ │ │ ├── package_info.py
│ │ │ │ ├── planner/
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── prompts_v2.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ ├── router/
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── select/
│ │ │ │ │ ├── expand.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── submit.py
│ │ │ │ ├── trace_scheduler.py
│ │ │ │ └── utils.py
│ │ │ ├── scen/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── utils.py
│ │ │ ├── share.yaml
│ │ │ ├── sing_docker/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── entrypoint.sh
│ │ │ │ └── kaggle_environment.yaml
│ │ │ └── test_eval.py
│ │ ├── finetune/
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark.py
│ │ │ │ ├── configs/
│ │ │ │ │ ├── models.yaml
│ │ │ │ │ └── opencompass_template.yaml
│ │ │ │ ├── data/
│ │ │ │ │ ├── adaptor.py
│ │ │ │ │ ├── default.py
│ │ │ │ │ └── financeiq_gen.py
│ │ │ │ └── merge/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── merge.py
│ │ │ │ └── merge_model_template.yaml
│ │ │ ├── datasets/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bioprobench/
│ │ │ │ │ └── README.md
│ │ │ │ ├── chemcot/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── deepscaler/
│ │ │ │ │ └── README.md
│ │ │ │ ├── financeiq/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── split.py
│ │ │ │ ├── panorama/
│ │ │ │ │ └── README.md
│ │ │ │ └── tableinstruct/
│ │ │ │ └── README.md
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── download/
│ │ │ │ ├── __init__.py
│ │ │ │ └── hf.py
│ │ │ ├── env/
│ │ │ │ ├── conda/
│ │ │ │ │ ├── deepspeed/
│ │ │ │ │ │ ├── ds_z2_config.json
│ │ │ │ │ │ └── ds_z3_config.json
│ │ │ │ │ ├── llm_finetune_requirements.txt
│ │ │ │ │ └── opencompass_requirements.txt
│ │ │ │ └── docker/
│ │ │ │ ├── llm_finetune/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── opencompass/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── experiment.py
│ │ │ │ └── workspace.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ └── trace.py
│ │ │ ├── scen/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── docker_scripts/
│ │ │ │ │ └── extract_parameters.py
│ │ │ │ ├── llama_factory_manager.py
│ │ │ │ ├── memory_estimator.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── scenario.py
│ │ │ │ └── utils.py
│ │ │ ├── share.yaml
│ │ │ ├── train/
│ │ │ │ ├── eval.py
│ │ │ │ └── runner.py
│ │ │ └── utils.py
│ │ ├── general_model/
│ │ │ ├── prompts.yaml
│ │ │ └── scenario.py
│ │ ├── kaggle/
│ │ │ ├── README.md
│ │ │ ├── developer/
│ │ │ │ ├── coder.py
│ │ │ │ ├── feedback.py
│ │ │ │ └── runner.py
│ │ │ ├── docker/
│ │ │ │ ├── DS_docker/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── kaggle_docker/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── mle_bench_docker/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── README.md
│ │ │ │ ├── kaggle_experiment.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── scenario.py
│ │ │ │ ├── spaceship-titanic_template/
│ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ ├── feature/
│ │ │ │ │ │ └── feature.py
│ │ │ │ │ ├── model/
│ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ └── train.py
│ │ │ │ ├── templates/
│ │ │ │ │ ├── covid19-global-forecasting-week-1/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── digit-recognizer/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_nn.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── feedback-prize-english-language-learning/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── forest-cover-type-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ ├── train.py
│ │ │ │ │ │ └── train_past.py
│ │ │ │ │ ├── meta_tpl_deprecated/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_nn.py
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ └── model_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── new-york-city-taxi-fare-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_linear.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_linear.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── optiver-realized-volatility-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e11/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e14/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e16/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e26/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e5/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e8/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e9/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── sf-crime/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── spaceship-titanic/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── statoil-iceberg-classifier-challenge/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── tabular-playground-series-dec-2021/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── tabular-playground-series-may-2022/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ └── ventilator-pressure-prediction/
│ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ ├── feature/
│ │ │ │ │ │ └── feature.py
│ │ │ │ │ ├── model/
│ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ └── train.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workspace.py
│ │ │ ├── kaggle_crawler.py
│ │ │ ├── knowledge_management/
│ │ │ │ ├── README.md
│ │ │ │ ├── extract_knowledge.py
│ │ │ │ ├── graph.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── vector_base.py
│ │ │ ├── prompts.yaml
│ │ │ ├── proposal/
│ │ │ │ └── proposal.py
│ │ │ └── tpl_ex/
│ │ │ ├── aerial-cactus-identification/
│ │ │ │ ├── README.md
│ │ │ │ ├── ensemble.py
│ │ │ │ ├── feature.py
│ │ │ │ ├── load_data.py
│ │ │ │ ├── main.py
│ │ │ │ ├── model01.py
│ │ │ │ └── spec/
│ │ │ │ ├── data_loader.md
│ │ │ │ ├── ensemble.md
│ │ │ │ ├── feature.md
│ │ │ │ ├── model.md
│ │ │ │ └── workflow.md
│ │ │ └── meta/
│ │ │ └── spec.md
│ │ ├── qlib/
│ │ │ ├── developer/
│ │ │ │ ├── factor_coder.py
│ │ │ │ ├── factor_runner.py
│ │ │ │ ├── feedback.py
│ │ │ │ ├── model_coder.py
│ │ │ │ ├── model_runner.py
│ │ │ │ └── utils.py
│ │ │ ├── docker/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── factor_data_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── generate.py
│ │ │ │ ├── factor_experiment.py
│ │ │ │ ├── factor_from_report_experiment.py
│ │ │ │ ├── factor_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── conf_baseline.yaml
│ │ │ │ │ ├── conf_combined_factors.yaml
│ │ │ │ │ ├── conf_combined_factors_sota_model.yaml
│ │ │ │ │ └── read_exp_res.py
│ │ │ │ ├── model_experiment.py
│ │ │ │ ├── model_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── conf_baseline_factors_model.yaml
│ │ │ │ │ ├── conf_sota_factors_model.yaml
│ │ │ │ │ └── read_exp_res.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── quant_experiment.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workspace.py
│ │ │ ├── factor_experiment_loader/
│ │ │ │ ├── json_loader.py
│ │ │ │ ├── pdf_loader.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── prompts.yaml
│ │ │ └── proposal/
│ │ │ ├── bandit.py
│ │ │ ├── factor_proposal.py
│ │ │ ├── model_proposal.py
│ │ │ └── quant_proposal.py
│ │ ├── rl/
│ │ │ ├── autorl_bench/
│ │ │ │ ├── .gitignore
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── agents/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── claude/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── codex/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── example_agent/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ ├── start.sh
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── gemini/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── opencode/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── openhands/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── rdagent/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ └── registry.py
│ │ │ │ ├── benchmarks/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── alfworld/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── base_config.yaml
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ ├── eval.py
│ │ │ │ │ │ ├── react_prompts.json
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── alpacaeval/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── annotators_gpt52_fn/
│ │ │ │ │ │ │ ├── alpaca_eval_fn.txt
│ │ │ │ │ │ │ ├── annotations_seed0_configs.json
│ │ │ │ │ │ │ └── configs.yaml
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ ├── eval.py
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── deepsearchqa/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ └── eval.py
│ │ │ │ │ ├── gsm8k/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ └── description.md
│ │ │ │ │ ├── humaneval/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── smith/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── per_sample_eval.py
│ │ │ │ │ └── webshop/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── data.py
│ │ │ │ │ ├── description.md
│ │ │ │ │ ├── eval.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── conf.py
│ │ │ │ ├── core/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── evaluator.py
│ │ │ │ │ ├── instructions.md
│ │ │ │ │ ├── metrics.py
│ │ │ │ │ ├── opencompass.py
│ │ │ │ │ ├── server.py
│ │ │ │ │ ├── skill.md
│ │ │ │ │ ├── ui.py
│ │ │ │ │ └── utils.py
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run.py
│ │ │ │ └── test/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_benchmark.py
│ │ │ │ └── test_fixes.py
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── env/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ └── docker/
│ │ │ │ ├── base/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── evalplus/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── gsm8k/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── miniwob/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── experiment.py
│ │ │ │ └── workspace.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ └── trace.py
│ │ │ ├── scen/
│ │ │ │ └── scenario.py
│ │ │ └── train/
│ │ │ └── runner.py
│ │ └── shared/
│ │ ├── get_runtime_info.py
│ │ └── runtime_info.py
│ └── utils/
│ ├── __init__.py
│ ├── agent/
│ │ ├── __init__.py
│ │ ├── apply_patch.py
│ │ ├── ret.py
│ │ ├── tpl.py
│ │ ├── tpl.yaml
│ │ └── workflow.py
│ ├── blob/
│ │ ├── azsync.sh
│ │ └── gen_token.sh
│ ├── env.py
│ ├── fmt.py
│ ├── prompts.yaml
│ ├── qlib.py
│ ├── repo/
│ │ ├── README.md
│ │ ├── diff.py
│ │ └── repo_utils.py
│ └── workflow/
│ ├── __init__.py
│ ├── loop.py
│ ├── misc.py
│ └── tracking.py
├── requirements/
│ ├── docs.txt
│ ├── lint.txt
│ ├── package.txt
│ ├── test.txt
│ └── torch.txt
├── requirements.txt
├── test/
│ ├── finetune/
│ │ ├── test_benchmark.py
│ │ ├── test_benchmark_api.py
│ │ └── test_benchmark_tablebench.py
│ ├── notebook/
│ │ ├── test_notebook_converter.py
│ │ ├── test_util.py
│ │ └── testfiles/
│ │ ├── main.ipynb
│ │ ├── main.py
│ │ ├── main2.ipynb
│ │ ├── main2.py
│ │ ├── main_missing_main_fn.py
│ │ └── main_missing_sections.py
│ ├── oai/
│ │ ├── test_advanced.py
│ │ ├── test_base.py
│ │ ├── test_completion.py
│ │ ├── test_embedding_and_similarity.py
│ │ ├── test_llm_connectivity.py
│ │ ├── test_prefect_cache.py
│ │ └── test_pydantic.py
│ ├── qlib/
│ │ └── test_model_factor_proposal.py
│ ├── rl/
│ │ └── __init__.py
│ └── utils/
│ ├── README.md
│ ├── coder/
│ │ ├── test_CoSTEER.py
│ │ └── test_finetune_coder.py
│ ├── test_agent_infra.py
│ ├── test_conf.py
│ ├── test_env.py
│ ├── test_import.py
│ ├── test_kaggle.py
│ ├── test_misc.py
│ └── test_ws.py
└── web/
├── .gitignore
├── README.md
├── auto-imports.d.ts
├── components.d.ts
├── index.html
├── package.json
├── src/
│ ├── App.vue
│ ├── common/
│ │ ├── code-theme.css
│ │ ├── py-theme.css
│ │ └── reset.css
│ ├── components/
│ │ ├── chartBox.vue
│ │ ├── code.vue
│ │ ├── development.vue
│ │ ├── dialog.vue
│ │ ├── feedback.vue
│ │ ├── footer.vue
│ │ ├── kateX.vue
│ │ ├── lineChart.vue
│ │ ├── lineChartOne.vue
│ │ ├── loading-dot.vue
│ │ ├── loading.vue
│ │ ├── loop-component.vue
│ │ ├── markdown.vue
│ │ ├── markdownToHtml.vue
│ │ ├── navBar.vue
│ │ ├── research.vue
│ │ ├── saveImage.vue
│ │ ├── select-component.vue
│ │ ├── sm-select-component.vue
│ │ ├── step-component.vue
│ │ ├── svgIcon.vue
│ │ ├── swiper.vue
│ │ └── upload-progress.vue
│ ├── constants/
│ │ ├── mle-competitions.js
│ │ └── qlib.js
│ ├── main.ts
│ ├── router/
│ │ └── index.ts
│ ├── shims-vue.d.ts
│ ├── style.css
│ ├── utils/
│ │ ├── api.js
│ │ ├── crypto.js
│ │ ├── getAssets.ts
│ │ ├── request.js
│ │ └── snap.svg-min.js
│ ├── views/
│ │ ├── Home.vue
│ │ ├── Login.vue
│ │ ├── Playground.vue
│ │ ├── Playground1.vue
│ │ ├── PlaygroundPage.vue
│ │ └── ResultPage.vue
│ └── vite-env.d.ts
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
================================================
FILE CONTENTS
================================================
================================================
FILE: .bumpversion.cfg
================================================
[bumpversion]
current_version = 0.0.0
commit = True
tag = True
[bumpversion:file:pyproject.toml]
================================================
FILE: .commitlintrc.js
================================================
module.exports = {
extends: ["@commitlint/config-conventional"],
rules: {
// Configuration Format: [level, applicability, value]
// level: Error level, usually expressed as a number:
// 0 - disable rule
// 1 - Warning (does not prevent commits)
// 2 - Error (will block the commit)
// applicability: the conditions under which the rule applies, commonly used values:
// “always” - always apply the rule
// “never” - never apply the rule
// value: the specific value of the rule, e.g. a maximum length of 100.
// Refs: https://commitlint.js.org/reference/rules-configuration.html
"header-max-length": [2, "always", 100],
"type-enum": [
2,
"always",
["build", "chore", "ci", "docs", "feat", "fix", "perf", "refactor", "revert", "style", "test", "Release-As"]
]
}
};
================================================
FILE: .devcontainer/Dockerfile
================================================
# 1. Pull down your Azure Container Registry image
FROM rdagentappregistry.azurecr.io/rd-agent-mle:20250623
# 2. (Optional) install any additional tools you need
# e.g. git, bash-completion, etc.
# RUN apt update && \
# apt install -y git bash-completion && \
# rm -rf /var/lib/apt/lists/*
RUN apt update && \
apt install -y git bash-completion
================================================
FILE: .devcontainer/README.md
================================================
# Introduction
!!!!!This dev container is not for public development!!!!!!
!!!!!Please don't use it if you are just a public open-source user.!!!!!!
# Steps to run the dev container (for internal use only)
Prerequisites(this is the reason why this dev container is not for public use):
- Make sure you have the `rdagentappregistry.azurecr.io/rd-agent-mle:20250623` image locally & DevContainer is installed in your IDE
- The kaggle dataset is located at `/home/shared/RD-Agent/kaggle`
1. Open the project and select "Open In DevContainer"
2. Set up your Kaggle Key (do not share this; other internal URLs are hardcoded in the config files)
```bash
export KAGGLE_USERNAME=
export KAGGLE_KEY=
```
3. Run: python rdagent/app/data_science/loop.py --competition nomad2018-predict-transparent-conductors
# Additional Notes
- Please install and use this Dev Container in VS Code.
- You **must open VS Code remotely and enter the `RD-Agent` directory before running the DevContainer configuration (`.devcontainer/devcontainer.json`)**. Otherwise, the workspace and path mappings will not work as expected.
- To open the DevContainer correctly in VS Code:
1. Remotely connect to the machine and open the `RD-Agent` folder in VS Code.
2. Press `Ctrl+Shift+P` (or `Cmd+Shift+P` on Mac), type and select **"Dev Containers: Reopen in Container"**.
# How to grade your submission in the DevContainer
1. save your submission file in `./sumission.csv`
2. Run evaluation
DS_COMPETITION=<your competition name>
conda run -n mlebench mlebench grade-sample submission.csv $DS_COMPETITION --data-dir /tmp/kaggle/zip_files/
================================================
FILE: .devcontainer/devcontainer.json
================================================
{
"name": "rd-agent-mle DevContainer",
"build": {
"dockerfile": "Dockerfile",
"context": ".."
},
"workspaceFolder": "/workspace/RD-Agent",
"workspaceMount": "source=${localWorkspaceFolder},target=/workspace/RD-Agent,type=bind,consistency=cached",
"remoteUser": "root",
"settings": {
"terminal.integrated.shell.linux": "/bin/bash"
},
"mounts": [
"source=/home/shared/RD-Agent/kaggle,target=/tmp/kaggle,type=bind,consistency=cached,readonly"
],
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
"ms-toolsai.jupyter"
],
"runArgs": [
"--init",
"--shm-size=1g",
"--env-file", "${localWorkspaceFolder}/.devcontainer/env",
"--network=host",
"--gpus=all"
],
"postCreateCommand": "make dev"
}
================================================
FILE: .devcontainer/env
================================================
# Global configs:
MAX_RETRY=12000
RETRY_WAIT_SECONDS=5
TIMEOUT_FAIL_LIMIT=100
# litellm
# CHAT_MODEL=gpt-4o
# CHAT_TEMPERATURE=0.7
CHAT_STREAM=False
CHAT_TEMPERATURE=1
CHAT_MODEL=o1-preview
SYSTEM_PROMPT_ROLE=user
BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
OPENAI_API_KEY=sk-1234
OPENAI_API_BASE=http://ep14.213428.xyz:38881
# amc chat model configs:
EMBEDDING_MODEL=text-embedding-ada-002
# Cache Setting (Optional):
DUMP_CHAT_CACHE=True
USE_CHAT_CACHE=False
DUMP_EMBEDDING_CACHE=True
USE_EMBEDDING_CACHE=False
LOG_LLM_CHAT_CONTENT=True
DS_LOCAL_DATA_PATH=/tmp/kaggle
DS_IF_USING_MLE_DATA=True
PICKLE_CACHE_FOLDER_PATH_STR=./log/pickle_cache
CACHE_WITH_PICKLE=False
ENABLE_CACHE=False
PROMPT_CACHE_PATH=./log/prompt_cache.db
DS_CODER_COSTEER_ENV_TYPE=conda
# DS_PROPOSAL_VERSION=v2 deprecated
DS_CODER_ON_WHOLE_PIPELINE=True
COSTEER_V2_QUERY_FORMER_TRACE_LIMIT=3
# export PYTHONPATH=. # this is for running researcher branch;
================================================
FILE: .github/FUNDING.yml
================================================
github:
- MIIC-finance
================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.md
================================================
---
name: "\U0001F41B Bug Report"
about: Submit a bug report to help us improve RD-Agent
labels: bug
---
## 🐛 Bug Description
<!-- A clear and concise description of what the bug is. -->
## To Reproduce
Steps to reproduce the behavior:
1.
2.
3.
## Expected Behavior
<!-- A clear and concise description of what you expected to happen. -->
## Screenshot
<!-- A screenshot of the error message or anything shouldn't appear-->
## Environment
**Note**: Users can run `rdagent collect_info` to get system information and paste it directly here.
- Name of current operating system:
- Processor architecture:
- System, version, and hardware information:
- Version number of the system:
- Python version:
- Container ID:
- Container Name:
- Container Status:
- Image ID used by the container:
- Image tag used by the container:
- Container port mapping:
- Container Label:
- Startup Commands:
- RD-Agent version:
- Package version:
## Additional Notes
<!-- Add any other information about the problem here. -->
================================================
FILE: .github/ISSUE_TEMPLATE/documentation.md
================================================
---
name: "\U0001F4D6 Documentation"
about: Report an issue related to documentation
---
## 📖 Documentation
<!-- Please specify whether it's tutorial part or API reference part, and describe it.-->
================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: "\U0001F31FFeature Request"
about: Request for a new RD-Agent feature
labels: enhancement
---
## 🌟 Feature Description
<!-- A clear and concise description of the feature proposal -->
## Motivation
1. Application scenario
2. Related works (Papers, Github repos etc.):
3. Any other relevant and important information:
<!-- Please describe why the feature is important. -->
## Alternatives
<!-- A short description of any alternative solutions or features you've considered. -->
## Additional Notes
<!-- Add any other context or screenshots about the feature request here. -->
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: "❓Questions & Help"
about: Have some questions? We can offer help.
labels: question
---
## ❓ Questions and Help
We sincerely suggest you to carefully read the [documentation](http://rdagent.readthedocs.io/). After that, if you still feel puzzled, please describe the question clearly under this issue.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!--- Thank you for submitting a Pull Request! In order to make our work smoother. -->
<!--- please make sure your Pull Request meets the following requirements: -->
<!--- 1. Provide a general summary of your changes in the Title above; -->
<!--- 2. Add appropriate prefixes to titles, such as `build:`, `chore:`, `ci:`, `docs:`, `feat:`, `fix:`, `perf:`, `refactor:`, `revert:`, `style:`, `test:`(Ref: https://www.conventionalcommits.org/). -->
<!--- Category: -->
<!--- Patch Updates: `fix:` -->
<!--- Example: fix(auth): correct login validation issue -->
<!--- minor update (introduces new functionality): `feat` -->
<!--- Example: feature(parser): add ability to parse arrays -->
<!--- major update(destructive update): Include BREAKING CHANGE in the commit message footer, or add `! ` in the commit footer to indicate that there is a destructive update. -->
<!--- Example: feat(auth)! : remove support for old authentication method -->
<!--- Other updates: `build:`, `chore:`, `ci:`, `docs:`, `perf:`, `refactor:`, `revert:`, `style:`, `test:`. -->
## Description
<!--- Describe your changes in detail -->
## Motivation and Context
<!--- Are there any related issues? If so, please put the link here. -->
<!--- Why is this change required? What problem does it solve? -->
## How Has This Been Tested?
<!--- Put an `x` in all the boxes that apply: --->
- [ ] If you are adding a new feature, test on your own test scripts.
<!--- **ATTENTION**: If you are adding a new feature, please make sure your codes are **correctly tested**. If our test scripts do not cover your cases, please provide your own test scripts under the `tests` folder and test them. More information about test scripts can be found [here](https://docs.python.org/3/library/unittest.html#basic-example), or you could refer to those we provide under the `tests` folder. -->
## Screenshots of Test Results (if appropriate):
1. Your own tests:
## Types of changes
<!--- What types of changes does your code introduce? Put an `x` in all the boxes that apply: -->
- [ ] Fix bugs
- [ ] Add new feature
- [ ] Update documentation
================================================
FILE: .github/dependabot.yml
================================================
updates:
- commit-message:
prefix: build(actions)
directory: /
package-ecosystem: github-actions
schedule:
interval: weekly
- commit-message:
prefix: build(requirements)
directory: /
groups:
dev:
dependency-type: development
prod:
dependency-type: production
package-ecosystem: pip
schedule:
interval: weekly
version: 2
================================================
FILE: .github/workflows/ci.yml
================================================
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
jobs:
ci:
if: ${{ !cancelled() && ! failure() }}
needs: dependabot
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
cache: pip
python-version: ${{ matrix.python-version }}
- run: make dev
- name: lint test docs and build
run: make lint docs-gen test-offline # test docs build
strategy:
matrix:
python-version:
- '3.10'
- '3.11'
dependabot:
if: ${{ github.actor == 'dependabot[bot]' && startsWith(github.head_ref, 'dependabot/pip/') }}
permissions:
contents: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ github.head_ref }}
- name: Set up Git
run: |
git config --global user.name github-actions
git config --global user.email github-actions@github.com
- name: Set up Python with multiple versions.
uses: actions/setup-python@v5
with:
cache: pip
python-version: |
3.10
3.11
- name: Install pipenv using pipx
run: pipx install pipenv
- name: Generate constraints for all supported Python versions
run: |
CI= PYTHON_VERSION=3.10 make constraints
CI= PYTHON_VERSION=3.11 make constraints
- name: Push changes if applicable
run: |
if [[ -n `git status --porcelain` ]]; then
git commit -a -m "build: Update constraints for dependabot."
git push
fi
name: CI
on:
pull_request:
types:
- opened
- synchronize
push:
branches:
- main
================================================
FILE: .github/workflows/pr.yml
================================================
name: Lint pull request title
on:
pull_request:
types:
- opened
- synchronize
- reopened
- edited
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
jobs:
lint-title:
runs-on: ubuntu-latest
steps:
# This step is necessary because the lint title uses the .commitlintrc.js file in the project root directory.
- name: Checkout Repository
uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version: '16'
- name: Install commitlint
run: npm install --save-dev @commitlint/{config-conventional,cli}
- name: Validate PR Title with commitlint
env:
BODY: ${{ github.event.pull_request.title }}
run: |
echo "$BODY" | npx commitlint --config .commitlintrc.js
================================================
FILE: .github/workflows/readthedocs-preview.yml
================================================
concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.ref }}
jobs:
documentation-links:
runs-on: ubuntu-latest
steps:
- uses: readthedocs/actions/preview@v1
with:
project-slug: RDAgent
name: Read the Docs Pull Request Preview
on:
pull_request_target:
types:
- opened
permissions:
pull-requests: write
================================================
FILE: .github/workflows/release.yml
================================================
name: Release
on:
push:
branches:
- main
permissions:
contents: read
jobs:
release_and_publish:
permissions:
contents: write
pull-requests: read
runs-on: ubuntu-latest
steps:
- name: Release please
id: release_please
uses: googleapis/release-please-action@v4
with:
# The current PAT (personal access token) was created on 2024-08-05,
# since the maximum validity of PAT is 1 year, you need to change the PAT before 2025-08-05.
token: ${{ secrets.PAT }}
release-type: simple
- uses: actions/checkout@v4
if: ${{ steps.release_please.outputs.release_created }}
with:
fetch-depth: 0
- name: Set up Python
if: ${{ steps.release_please.outputs.release_created }}
uses: actions/setup-python@v5
with:
cache: pip
python-version: '3.10'
- name: Install dependencies
if: ${{ steps.release_please.outputs.release_created }}
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine # better-exceptions(optional for debug)
- run: make dev
if: ${{ steps.release_please.outputs.release_created }}
- run: make build
if: ${{ steps.release_please.outputs.release_created }}
- name: upload
if: ${{ steps.release_please.outputs.release_created }}
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
make upload
================================================
FILE: .gitignore
================================================
# Custom
*.swp
.DS_Store
Pipfile
public
release-notes.md
typescript*
tmp/
.ai/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
/log*/
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env*
*.env
.venv
^env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# all pkl files
*.pkl
# all h5 files
*.h5
# all vs-code files
.vscode/
# reports
reports/
# git_ignore_folder
git_ignore_folder/
#cache
*cache*/
*cache.json
# DB files
*.db
# Docker
factor_template/mlruns/
env_tpl
mlruns/
# possible output from coder or runner
*.pth
*qlib_res.csv
# shell script
*.out
/*.sh
.aider*
rdagent/app/benchmark/factor/example.json
# UI Server resources
videos/
static/
# AI assistant
.cursor/
.claude/
AGENTS.md
!rdagent/**/AGENTS.md
scripts/
================================================
FILE: .readthedocs.yaml
================================================
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.10"
# During the build process, you need to fetch tags, and since the default command to read the docs only pulls shallow code, it will cause an error.
# So we added the `git fetch --tags --unshallow || true` command to fetch the full tag record.
# Adding this command overrides the default command, so we copied it over to make sure the build was successful.
commands:
- python -mvirtualenv $READTHEDOCS_VIRTUALENV_PATH
- python -m pip install --upgrade --no-cache-dir pip setuptools
- python -m pip install --upgrade --no-cache-dir sphinx
- python -m pip install --exists-action=w --no-cache-dir -r requirements/docs.txt
- python -m pip install --upgrade --upgrade-strategy only-if-needed --no-cache-dir .
- git fetch --tags --unshallow || true
- mkdir -p $READTHEDOCS_OUTPUT/html/
- python -m sphinx -T -b html -d _build/doctrees -D language=en ./docs $READTHEDOCS_OUTPUT/html
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Build all formats
formats: all
# Optionally set the version of Python and requirements required to build your docs
python:
install:
- requirements: requirements/docs.txt
- method: pip
path: .
================================================
FILE: .streamlit/config.toml
================================================
[client]
showSidebarNavigation = false
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## [0.8.0](https://github.com/microsoft/RD-Agent/compare/v0.7.0...v0.8.0) (2025-11-03)
### Features
* add a rag mcp in proposal ([#1267](https://github.com/microsoft/RD-Agent/issues/1267)) ([a0cd102](https://github.com/microsoft/RD-Agent/commit/a0cd1025c141aee6d4e6cb10286c77d827b89379))
* add coder check and give more time ([#1127](https://github.com/microsoft/RD-Agent/issues/1127)) ([e32d229](https://github.com/microsoft/RD-Agent/commit/e32d229f2b722acac53f4e2f7d8a98e29cb19dc1))
* add enable_cache toggle for UI data caching ([#1075](https://github.com/microsoft/RD-Agent/issues/1075)) ([0c9f193](https://github.com/microsoft/RD-Agent/commit/0c9f1930e8d5df1c00bfb32ee578da2dc53db1ec))
* add extra_eval config and import_class for custom evaluators ([#1097](https://github.com/microsoft/RD-Agent/issues/1097)) ([5accec3](https://github.com/microsoft/RD-Agent/commit/5accec37c8828ac42005c2d12b815bef599b547e))
* add hypo_critic and hypo_rewrite in proposal ([#1106](https://github.com/microsoft/RD-Agent/issues/1106)) ([71440f6](https://github.com/microsoft/RD-Agent/commit/71440f643fc9d952dfa064359c1945b729dbfd9f))
* add improve_mode to MultiProcessEvolvingStrategy for selective task implementation ([#1273](https://github.com/microsoft/RD-Agent/issues/1273)) ([9344635](https://github.com/microsoft/RD-Agent/commit/93446356952803d8b1f1eb0c39da825c19274cb6))
* add loop ID mapping to trace nodes and update UI labels ([#1098](https://github.com/microsoft/RD-Agent/issues/1098)) ([5437851](https://github.com/microsoft/RD-Agent/commit/54378518dadd6c38496eceda8ef5b33b375a5c97))
* add mask inference in debug mode ([#1154](https://github.com/microsoft/RD-Agent/issues/1154)) ([ef749ab](https://github.com/microsoft/RD-Agent/commit/ef749ab744fb6fbafd1a8e6a3642cce20ce96069))
* add only success filter toggle for traces ([#1047](https://github.com/microsoft/RD-Agent/issues/1047)) ([5e582cc](https://github.com/microsoft/RD-Agent/commit/5e582cc71d5c153666c465cb2d797dc71e43c501))
* add option to enable hyperparameter tuning only in first eval loop ([#1211](https://github.com/microsoft/RD-Agent/issues/1211)) ([bc3fa17](https://github.com/microsoft/RD-Agent/commit/bc3fa170b029f50c8f7b1828cdf4ffd024e64b8b))
* add previous runner loops to runner history ([#1142](https://github.com/microsoft/RD-Agent/issues/1142)) ([8de9f75](https://github.com/microsoft/RD-Agent/commit/8de9f757ea134b04cde0622c6225678d85a87862))
* add reasoning attribute to DSRunnerFeedback for enhanced evaluation context ([#1162](https://github.com/microsoft/RD-Agent/issues/1162)) ([4e41c97](https://github.com/microsoft/RD-Agent/commit/4e41c9797cbafd35cc0d883fede4226398c573e1))
* add sample submission file check ([#1053](https://github.com/microsoft/RD-Agent/issues/1053)) ([6a840d8](https://github.com/microsoft/RD-Agent/commit/6a840d819251e64d98daa40289592a05ac5fb369))
* add show_hard_limit option and update time limit handling in DataScience settings ([#1144](https://github.com/microsoft/RD-Agent/issues/1144)) ([fe762cd](https://github.com/microsoft/RD-Agent/commit/fe762cd860a109b426e3d89a6fbc3c161d77b5e2))
* add stdout into workspace for easier debugging ([#1236](https://github.com/microsoft/RD-Agent/issues/1236)) ([d3d4967](https://github.com/microsoft/RD-Agent/commit/d3d4967a129ad986d5087add4b101d913e1e14ba))
* add time ratio limit for hyperparameter tuning in Kaggle settin… ([#1135](https://github.com/microsoft/RD-Agent/issues/1135)) ([e44bc83](https://github.com/microsoft/RD-Agent/commit/e44bc8356a93b63eb120e88336eaf4c5b05ccd97))
* add user interaction in data science scenario ([#1251](https://github.com/microsoft/RD-Agent/issues/1251)) ([2afef70](https://github.com/microsoft/RD-Agent/commit/2afef703ca0e670197d02aab7f9c4f6e3e409872))
* add ws CLI and support optional timeout/cache ([#1066](https://github.com/microsoft/RD-Agent/issues/1066)) ([fae3def](https://github.com/microsoft/RD-Agent/commit/fae3defefa38e91131d4e351d68f4484ca280956))
* analyze feedback based on sota numbers ([#1116](https://github.com/microsoft/RD-Agent/issues/1116)) ([167f5e2](https://github.com/microsoft/RD-Agent/commit/167f5e2fe9a5679d5beca2f7d3093ac0fd17e664))
* create Jupyter notebook pipeline file based on main.py file ([#1134](https://github.com/microsoft/RD-Agent/issues/1134)) ([2fa1790](https://github.com/microsoft/RD-Agent/commit/2fa1790cb3852d96a197fd7970af4063339dfa26))
* enable drafting with knowledge ([#998](https://github.com/microsoft/RD-Agent/issues/998)) ([8e385eb](https://github.com/microsoft/RD-Agent/commit/8e385ebf422256d08f02c055ab64115872b69d94))
* enable finetune llm ([#1055](https://github.com/microsoft/RD-Agent/issues/1055)) ([909c7d6](https://github.com/microsoft/RD-Agent/commit/909c7d6e8a35ce8c43d29201eccfe5cd2a21049d))
* enable LLM‑based hypothesis selection with time‑aware prompt & colored logging ([#1122](https://github.com/microsoft/RD-Agent/issues/1122)) ([1c4ab89](https://github.com/microsoft/RD-Agent/commit/1c4ab89f52fbdff7cab68ee1b778703b20514a9b))
* enable meta planner ([#1103](https://github.com/microsoft/RD-Agent/issues/1103)) ([c208209](https://github.com/microsoft/RD-Agent/commit/c20820929b7fcdd5c9fbb81e63bad0ba76239c50))
* enable to inject diversity cross async multi-trace ([#1173](https://github.com/microsoft/RD-Agent/issues/1173)) ([bcdd957](https://github.com/microsoft/RD-Agent/commit/bcdd957c71b59d8664ecb1523b5fcf2179aa1138))
* enhance timeout handling in CoSTEER and DataScience scenarios ([#1150](https://github.com/microsoft/RD-Agent/issues/1150)) ([06233cb](https://github.com/microsoft/RD-Agent/commit/06233cb95acb1df01ca71b1a554cf4a5f2c4d092))
* enhance timeout management and knowledge base handling in CoSTEER components ([#1130](https://github.com/microsoft/RD-Agent/issues/1130)) ([963d260](https://github.com/microsoft/RD-Agent/commit/963d26001e346c05bcc540536f65d9a199ca6ac5))
* fallback to acceptable results ([#1129](https://github.com/microsoft/RD-Agent/issues/1129)) ([3ce2bd4](https://github.com/microsoft/RD-Agent/commit/3ce2bd41c442c6b756810c7895b1e6a1df13dfbb))
* improve fallback handling in CoSTEER and add GPU usage guidelin… ([#1165](https://github.com/microsoft/RD-Agent/issues/1165)) ([cec4240](https://github.com/microsoft/RD-Agent/commit/cec424046759f02735a6b49e3a9f615a403b62c9))
* init pydantic ai agent & context 7 mcp ([#1240](https://github.com/microsoft/RD-Agent/issues/1240)) ([59af538](https://github.com/microsoft/RD-Agent/commit/59af5383d7d1d73a5e3630da9d1bbfed31111436))
* **mcp:** cache with one-click toggle ([#1269](https://github.com/microsoft/RD-Agent/issues/1269)) ([6f86863](https://github.com/microsoft/RD-Agent/commit/6f86863b63ae331f9b7761eaf9ae0a85aca7ba42))
* mcts policy based on trace scheduler ([#1203](https://github.com/microsoft/RD-Agent/issues/1203)) ([13890e0](https://github.com/microsoft/RD-Agent/commit/13890e0bbcaf5a7a87a7bff55e720b0c3bbbbfe9))
* new prompt for auto-sota-selector ([#1109](https://github.com/microsoft/RD-Agent/issues/1109)) ([13c92a9](https://github.com/microsoft/RD-Agent/commit/13c92a90eee275e40a9a2fb0b853c8ecb2bd59fd))
* offline selector ([#1231](https://github.com/microsoft/RD-Agent/issues/1231)) ([76b2e87](https://github.com/microsoft/RD-Agent/commit/76b2e87348cbeb983606691fdf343c4fc721c2bb))
* prob-based trace scheduler ([#1131](https://github.com/microsoft/RD-Agent/issues/1131)) ([970561a](https://github.com/microsoft/RD-Agent/commit/970561a057ed5e56e29be3577b7c062aca4b49b6))
* query & cache package_info ([#1083](https://github.com/microsoft/RD-Agent/issues/1083)) ([19869ea](https://github.com/microsoft/RD-Agent/commit/19869ea4752b67b62ffdcb8d54632a59661b5466))
* refactor CoSTEER classes to use DSCoSTEER and update max seconds handling ([#1156](https://github.com/microsoft/RD-Agent/issues/1156)) ([6d01e3e](https://github.com/microsoft/RD-Agent/commit/6d01e3e1ca1eec281b52f461724bf63adefe5d81))
* refine the logic of enabling hyperparameter tuning and add criteira ([#1175](https://github.com/microsoft/RD-Agent/issues/1175)) ([af071f5](https://github.com/microsoft/RD-Agent/commit/af071f5f45bfeb524a0f16da84d802e523478213))
* show the summarized final difference between the final workspace and the base workspace ([#1281](https://github.com/microsoft/RD-Agent/issues/1281)) ([2bf8345](https://github.com/microsoft/RD-Agent/commit/2bf83453921457e44c802913a8e24b0de98611bd))
* streamline hyperparameter tuning checks and update evaluation g… ([#1167](https://github.com/microsoft/RD-Agent/issues/1167)) ([383e5ed](https://github.com/microsoft/RD-Agent/commit/383e5ed488c73abedb41acb2ea27afd60738669f))
* ui, support disable cache ([#1217](https://github.com/microsoft/RD-Agent/issues/1217)) ([92efe33](https://github.com/microsoft/RD-Agent/commit/92efe33fa9c8be54a71bf0840f867edc877236fe))
* update README with latest paper acceptance to NeurIPS 2025 ([#1252](https://github.com/microsoft/RD-Agent/issues/1252)) ([8332960](https://github.com/microsoft/RD-Agent/commit/833296084f3b3d0fea15fd693e302c26b2d80762))
### Bug Fixes
* add a switch for ensemble_time_upper_bound and fix some bug in main ([#1226](https://github.com/microsoft/RD-Agent/issues/1226)) ([f00a538](https://github.com/microsoft/RD-Agent/commit/f00a5382b16379aaea2dfabf09a681be25e29d3e))
* add gpu_info in research phase ([#1094](https://github.com/microsoft/RD-Agent/issues/1094)) ([58c9c1b](https://github.com/microsoft/RD-Agent/commit/58c9c1b9b62d6d25b9b6980e19959664ef7272d7))
* add json format response fallback to prompt templates ([#1246](https://github.com/microsoft/RD-Agent/issues/1246)) ([4dfb8a1](https://github.com/microsoft/RD-Agent/commit/4dfb8a130b3970192d3a8da799152de492c79aec))
* add metric in scores.csv and avoid reading sample_submission.csv ([#1152](https://github.com/microsoft/RD-Agent/issues/1152)) ([fd039f1](https://github.com/microsoft/RD-Agent/commit/fd039f1f8184c9107539f270735f227cf68c62c0))
* add missing self parameter to instance methods in DSProposalV2ExpGen ([#1213](https://github.com/microsoft/RD-Agent/issues/1213)) ([68af035](https://github.com/microsoft/RD-Agent/commit/68af03517749cff4726acb016daad561148147bf))
* add spec for hyperparameters in task design and coder ([#995](https://github.com/microsoft/RD-Agent/issues/995)) ([10246fd](https://github.com/microsoft/RD-Agent/commit/10246fd2491d48560d5f7055f78906e7a6a2882e))
* align scenario descriptions and include debug timeout ([#1079](https://github.com/microsoft/RD-Agent/issues/1079)) ([13b6663](https://github.com/microsoft/RD-Agent/commit/13b66630ec17f1ed4f52a9d8ea0913722ca74483))
* allow prev_out keys to be None in workspace cleanup assertion ([#1214](https://github.com/microsoft/RD-Agent/issues/1214)) ([1f4d190](https://github.com/microsoft/RD-Agent/commit/1f4d190a3209bbe4ec960f8dd79be59672cd0e7f))
* based on response schema; not function calling ([#1038](https://github.com/microsoft/RD-Agent/issues/1038)) ([99da8c5](https://github.com/microsoft/RD-Agent/commit/99da8c58f0f779aa19edc2522d4cf143577811d8))
* cancel tasks on resume and kill subprocesses on termination ([#1166](https://github.com/microsoft/RD-Agent/issues/1166)) ([cf6e418](https://github.com/microsoft/RD-Agent/commit/cf6e418eb8d899e22c93279055d42c185397fa2a))
* change runner prompts ([#1223](https://github.com/microsoft/RD-Agent/issues/1223)) ([6d3e73d](https://github.com/microsoft/RD-Agent/commit/6d3e73d679a8ffe4a48923590a7c37b4fdcd207a))
* clear ws_ckp after extraction to reduce workspace object size ([#1137](https://github.com/microsoft/RD-Agent/issues/1137)) ([783affe](https://github.com/microsoft/RD-Agent/commit/783affe0d513b2e9fbcbb11e0408cc79db19a274))
* correct DS_LOCAL_DATA_PATH error in devcontainer ([#1063](https://github.com/microsoft/RD-Agent/issues/1063)) ([588fcfa](https://github.com/microsoft/RD-Agent/commit/588fcfa3ab0a4eca5afee766e3f56f094b28a999))
* **dockerfile:** install coreutils to resolve timeout command error ([#1260](https://github.com/microsoft/RD-Agent/issues/1260)) ([07f89b0](https://github.com/microsoft/RD-Agent/commit/07f89b013ea99102f4875fda5704adde14cf9978))
* **docs:** update rdagent ui with correct params ([#1249](https://github.com/microsoft/RD-Agent/issues/1249)) ([f360d0a](https://github.com/microsoft/RD-Agent/commit/f360d0a212793eb044c218b5e13b095e684a632d))
* enable embedding truncation ([#1188](https://github.com/microsoft/RD-Agent/issues/1188)) ([2421fa4](https://github.com/microsoft/RD-Agent/commit/2421fa4493bd86c98ff672afc26ec71ba510e391))
* enhance feedback handling in MultiProcessEvolvingStrategy for improved task evolution ([#1274](https://github.com/microsoft/RD-Agent/issues/1274)) ([961e561](https://github.com/microsoft/RD-Agent/commit/961e56102cddae3348af46a30f9085f353151890))
* error in prompt template ([#1065](https://github.com/microsoft/RD-Agent/issues/1065)) ([a90e598](https://github.com/microsoft/RD-Agent/commit/a90e598e568c0339a5f29577fbf44e302bc0d96f))
* filter log folders bug in ui ([#1073](https://github.com/microsoft/RD-Agent/issues/1073)) ([d0f33c5](https://github.com/microsoft/RD-Agent/commit/d0f33c56733bb28222c1f2c8f8a0ff5604ddf858))
* fix a bug in return curve display ([#1042](https://github.com/microsoft/RD-Agent/issues/1042)) ([249f661](https://github.com/microsoft/RD-Agent/commit/249f6614a67d8b38e9ad2f0d95154db7071e8e3a))
* fix a small bug in json_mode ([#1041](https://github.com/microsoft/RD-Agent/issues/1041)) ([8bc12ea](https://github.com/microsoft/RD-Agent/commit/8bc12eaaa7ecda69043ec781896299a6796c8140))
* fix a small bug in response_schema ([#1043](https://github.com/microsoft/RD-Agent/issues/1043)) ([66cadcd](https://github.com/microsoft/RD-Agent/commit/66cadcd7b2a91bac416acd94196b96f43b572c2b))
* fix bug for hypo_select_with_llm when not support response_schema ([#1208](https://github.com/microsoft/RD-Agent/issues/1208)) ([54cc2c4](https://github.com/microsoft/RD-Agent/commit/54cc2c492e3f6b22b3836899f2ddf83b1296f173))
* fix chat_max_tokens calculation method to show true input_max_tokens ([#1241](https://github.com/microsoft/RD-Agent/issues/1241)) ([7d749b8](https://github.com/microsoft/RD-Agent/commit/7d749b819557f1abfca58189ae2abf2aec41fef5))
* fix code diff bug ([#1115](https://github.com/microsoft/RD-Agent/issues/1115)) ([4603e88](https://github.com/microsoft/RD-Agent/commit/4603e88dbe910614f20a843f29463f17eebdda32))
* fix mcts ([#1270](https://github.com/microsoft/RD-Agent/issues/1270)) ([c73f67a](https://github.com/microsoft/RD-Agent/commit/c73f67affee035def37474c66ebdd00dbc16c4ca))
* fix some bugs in RD-Agent(Q) ([#1143](https://github.com/microsoft/RD-Agent/issues/1143)) ([44fd2ee](https://github.com/microsoft/RD-Agent/commit/44fd2ee68031599e106cbd99b8e86a110d8f2423))
* **graph:** using assignment expression to avoid repeated function call ([#1174](https://github.com/microsoft/RD-Agent/issues/1174)) ([b4f57ce](https://github.com/microsoft/RD-Agent/commit/b4f57cec87bc61e8aa408319532cec055cb2d632))
* handle mixed str and dict types in code_list ([#1279](https://github.com/microsoft/RD-Agent/issues/1279)) ([63ecb3b](https://github.com/microsoft/RD-Agent/commit/63ecb3bf26604d93f85595f6f6470c860be3c5ba))
* handle None output and conditional step dump in LoopBase execution ([#1212](https://github.com/microsoft/RD-Agent/issues/1212)) ([68b6985](https://github.com/microsoft/RD-Agent/commit/68b69851916ed5bca42aab859ba7a9938bec4eb7))
* handle the no-update case of root node in uncommited_rec_status ([#1062](https://github.com/microsoft/RD-Agent/issues/1062)) ([ead8dce](https://github.com/microsoft/RD-Agent/commit/ead8dced0e5b157b6e1bded380f440ee0b8a86f7))
* handle ValueError in stdout shrinking and refactor shrink logic ([#1228](https://github.com/microsoft/RD-Agent/issues/1228)) ([bc7a3b4](https://github.com/microsoft/RD-Agent/commit/bc7a3b43b7cef45f036d508f95231b5885ad65f7))
* ignore case when checking metric name ([#1160](https://github.com/microsoft/RD-Agent/issues/1160)) ([fc0df6e](https://github.com/microsoft/RD-Agent/commit/fc0df6e9fc7d8a9e7a0b4d4cb879cffbbcb9162f))
* ignore class types when filtering workflow steps ([#1085](https://github.com/microsoft/RD-Agent/issues/1085)) ([64e3ec8](https://github.com/microsoft/RD-Agent/commit/64e3ec8f9afb5611814f9b64d50e6dc0685df8b2))
* ignore RuntimeError for shared workspace double recovery ([#1140](https://github.com/microsoft/RD-Agent/issues/1140)) ([8fc1e9b](https://github.com/microsoft/RD-Agent/commit/8fc1e9bf8f5242e56d7bacf53cf58f9abe94e356))
* improve the logic of json_schema and refine the reasoning extraction logic for reasoning model ([#1044](https://github.com/microsoft/RD-Agent/issues/1044)) ([12060b1](https://github.com/microsoft/RD-Agent/commit/12060b197ca618ca8901f93cde6bc2b42d79e4e9))
* increase retry count in hypothesis_gen decorator to 10 ([#1230](https://github.com/microsoft/RD-Agent/issues/1230)) ([c4b8baa](https://github.com/microsoft/RD-Agent/commit/c4b8baaa5829567833ea2328fe89941423bf4cf2))
* increase time default not controlled by LLM ([#1196](https://github.com/microsoft/RD-Agent/issues/1196)) ([8c62561](https://github.com/microsoft/RD-Agent/commit/8c62561d1c6bd3c8b3d354951cd154b08d567ef2))
* insert await asyncio.sleep(0) to yield control in loop ([#1186](https://github.com/microsoft/RD-Agent/issues/1186)) ([5705be0](https://github.com/microsoft/RD-Agent/commit/5705be0512b788337c6798aea0bdf52791dd8e73))
* jinja problem of enumerate ([#1216](https://github.com/microsoft/RD-Agent/issues/1216)) ([af9068c](https://github.com/microsoft/RD-Agent/commit/af9068c0b5263c5f58a43ccd13c19808020f77aa))
* kaggle competition metric direction ([#1195](https://github.com/microsoft/RD-Agent/issues/1195)) ([a933b6c](https://github.com/microsoft/RD-Agent/commit/a933b6cabe6f6b673a30601f9b0974bc3ca806ae))
* merge candidates ([#1254](https://github.com/microsoft/RD-Agent/issues/1254)) ([5a78c89](https://github.com/microsoft/RD-Agent/commit/5a78c89cee1fb593e3503bd4266042ba1e29569a))
* minor conflict in prompts ([#1081](https://github.com/microsoft/RD-Agent/issues/1081)) ([f821e4c](https://github.com/microsoft/RD-Agent/commit/f821e4c1c56462c54d5fbe15dd797c147334b182))
* minor fix to runtime_environment ([#1089](https://github.com/microsoft/RD-Agent/issues/1089)) ([bff82ef](https://github.com/microsoft/RD-Agent/commit/bff82ef93e225c43c6b55bb642c484d5b88f3cff))
* model/factor experiment filtering in Qlib proposals ([#1257](https://github.com/microsoft/RD-Agent/issues/1257)) ([0f722e1](https://github.com/microsoft/RD-Agent/commit/0f722e1ce713d2010fe8b8181b905145a1186f95))
* move snapshot saving after step index update in loop execution ([#1206](https://github.com/microsoft/RD-Agent/issues/1206)) ([0e3a9af](https://github.com/microsoft/RD-Agent/commit/0e3a9afd0a30b5a12ef3431043405f3314b4c635))
* move task cancellation to finally block and fix subprocess kill typo ([#1234](https://github.com/microsoft/RD-Agent/issues/1234)) ([fb628e3](https://github.com/microsoft/RD-Agent/commit/fb628e3bcaded1f292e5827f258fa7d5f9ed74a9))
* package and timer bug ([#1092](https://github.com/microsoft/RD-Agent/issues/1092)) ([7faf6d9](https://github.com/microsoft/RD-Agent/commit/7faf6d9b215d678b8cb146270a3e917a62ac1d88))
* path traversal risk ([#1050](https://github.com/microsoft/RD-Agent/issues/1050)) ([2f78216](https://github.com/microsoft/RD-Agent/commit/2f782169ebeb0453422621ac8ace06353ca72615))
* prevent JSON content from being added multiple times during retries ([#1255](https://github.com/microsoft/RD-Agent/issues/1255)) ([9d46a68](https://github.com/microsoft/RD-Agent/commit/9d46a68a36f237ef99bbc4a78668d71339fa9f91))
* prevent parallelism in feedback and record steps ([#1046](https://github.com/microsoft/RD-Agent/issues/1046)) ([d0272a9](https://github.com/microsoft/RD-Agent/commit/d0272a9de104a629ccd2652b9e95c9bb58ac6cb1))
* prompt yaml ([#1112](https://github.com/microsoft/RD-Agent/issues/1112)) ([1f2c9b1](https://github.com/microsoft/RD-Agent/commit/1f2c9b17b8d5250dc2ff81ad564139746d11a7c3))
* properly assign sota_exp_fb before None comparison ([#1037](https://github.com/microsoft/RD-Agent/issues/1037)) ([5d6a927](https://github.com/microsoft/RD-Agent/commit/5d6a927501e95b6afa520294d23fcf9ca16c69ae))
* refine DSCoSTEER_eval prompts ([#1157](https://github.com/microsoft/RD-Agent/issues/1157)) ([c62e5fc](https://github.com/microsoft/RD-Agent/commit/c62e5fcc871d4f88babc5a4c9cf8e4655e8ba437))
* refine prompt, equal lightgbm, discourage over hypertuning ([#1072](https://github.com/microsoft/RD-Agent/issues/1072)) ([56ba15a](https://github.com/microsoft/RD-Agent/commit/56ba15a03fc278e7d701b40bbb5209411b27e561))
* refine prompt; runner focus on low hanging fruit ([#1076](https://github.com/microsoft/RD-Agent/issues/1076)) ([1778b8c](https://github.com/microsoft/RD-Agent/commit/1778b8c953888e9b3b91d28483e0b64d126e3eb6))
* refine prompts and add additional package info ([#1179](https://github.com/microsoft/RD-Agent/issues/1179)) ([22428a4](https://github.com/microsoft/RD-Agent/commit/22428a45053b6eefcfb805802b8bef4384a1ddda))
* refine task scheduling logic in MultiProcessEvolvingStrategy for… ([#1275](https://github.com/microsoft/RD-Agent/issues/1275)) ([417766e](https://github.com/microsoft/RD-Agent/commit/417766ee366d1fdf4a54e297a93d05cb606d5144))
* refine the prompt to force complete code & refine the logic of running ([#1069](https://github.com/microsoft/RD-Agent/issues/1069)) ([1e61de3](https://github.com/microsoft/RD-Agent/commit/1e61de3e60566029f1c89ca2c747bfbf3a354693))
* remove refine decision & bug fix ([#1031](https://github.com/microsoft/RD-Agent/issues/1031)) ([0059a6a](https://github.com/microsoft/RD-Agent/commit/0059a6aeb658a76bdc28cd7741a2bc9e6569363f))
* remove unused imports in data science scenario module ([#1136](https://github.com/microsoft/RD-Agent/issues/1136)) ([2307237](https://github.com/microsoft/RD-Agent/commit/23072377659da0bd206dc64dd858c9da75283f39))
* replace hardcoded ChromeDriver path with webdriver-manager ([#1271](https://github.com/microsoft/RD-Agent/issues/1271)) ([40876e2](https://github.com/microsoft/RD-Agent/commit/40876e2085fb0e30e46b69fec34208d7e0dd1162))
* revert 2 commits ([#1239](https://github.com/microsoft/RD-Agent/issues/1239)) ([1265ae9](https://github.com/microsoft/RD-Agent/commit/1265ae94e357190132fb2cd9ba3579d353ed6cee))
* revert to v10 setting ([#1220](https://github.com/microsoft/RD-Agent/issues/1220)) ([d868188](https://github.com/microsoft/RD-Agent/commit/d868188f9a6fd451d1daf1b1cc14017a50232b0d))
* scheduler next selection parallel disorder ([#1028](https://github.com/microsoft/RD-Agent/issues/1028)) ([f468595](https://github.com/microsoft/RD-Agent/commit/f468595169512b89f436396ee976404879e00d7a))
* set requires_documentation_search to None to disable feature in eval ([#1245](https://github.com/microsoft/RD-Agent/issues/1245)) ([e117234](https://github.com/microsoft/RD-Agent/commit/e1172343e483638dc24715402048ec7116e8a429))
* skip res_ratio check if timer or res_time is None ([#1189](https://github.com/microsoft/RD-Agent/issues/1189)) ([17400a3](https://github.com/microsoft/RD-Agent/commit/17400a3dc46ab987ef4670cf697a22c7145858be))
* split then sample & remove simple model guide in ds proposal ([#1034](https://github.com/microsoft/RD-Agent/issues/1034)) ([2dde8b8](https://github.com/microsoft/RD-Agent/commit/2dde8b84a1d08cf0ca39b2f50de64d053fd73ba8))
* stop evolve if global timer is timeout ([#1039](https://github.com/microsoft/RD-Agent/issues/1039)) ([ad37417](https://github.com/microsoft/RD-Agent/commit/ad374176a14be1fa5aac43fd8df48f89b2a81fe0))
* summary page bug ([#1219](https://github.com/microsoft/RD-Agent/issues/1219)) ([36fec9a](https://github.com/microsoft/RD-Agent/commit/36fec9afa6d740a9f1ac32ac661cf7ec9fdaefc8))
* TypeError: cannot unpack non-iterable bool object ([#1036](https://github.com/microsoft/RD-Agent/issues/1036)) ([f4370a4](https://github.com/microsoft/RD-Agent/commit/f4370a4265c84cefc4844d21b7f296929ca7638c))
* ui bug ([#1192](https://github.com/microsoft/RD-Agent/issues/1192)) ([ad901aa](https://github.com/microsoft/RD-Agent/commit/ad901aaf4f7b344b8171b98ea753fde67b058a9b))
* update fallback criterion ([#1210](https://github.com/microsoft/RD-Agent/issues/1210)) ([05fca1a](https://github.com/microsoft/RD-Agent/commit/05fca1acced3d3cfddbab3871d3dcee597b675bd))
* update requirements.txt's streamlit ([#1133](https://github.com/microsoft/RD-Agent/issues/1133)) ([512d08f](https://github.com/microsoft/RD-Agent/commit/512d08f56c210edfa2ff45c71e53724909f10d8f))
* use CoSTEERSettings for DSRunnerCoSTEERSettings ([#1096](https://github.com/microsoft/RD-Agent/issues/1096)) ([152a70f](https://github.com/microsoft/RD-Agent/commit/152a70f25a090e175e7b55c2285ca710954be9cc))
## [0.7.0](https://github.com/microsoft/RD-Agent/compare/v0.6.1...v0.7.0) (2025-07-08)
### Features
* add code change summary ([#1000](https://github.com/microsoft/RD-Agent/issues/1000)) ([937ec26](https://github.com/microsoft/RD-Agent/commit/937ec263b215928633822c4d76ad4e47442c8198))
* add hide_base_name option and update data folder prompts ([#1004](https://github.com/microsoft/RD-Agent/issues/1004)) ([2f61fa8](https://github.com/microsoft/RD-Agent/commit/2f61fa8cd90c91ad29f320ce9ea6c49f49ac9111))
* added running time statistics for the DS scenario experiment ([#1007](https://github.com/microsoft/RD-Agent/issues/1007)) ([030abd8](https://github.com/microsoft/RD-Agent/commit/030abd87191377641a678c80852f5ecad84e7a6e))
* merge code summary and support more traces ([#1025](https://github.com/microsoft/RD-Agent/issues/1025)) ([48201e7](https://github.com/microsoft/RD-Agent/commit/48201e79b55ff5a98dad51702a7d0ac6b1ddc9eb))
* show first evo round codes diff ([#1009](https://github.com/microsoft/RD-Agent/issues/1009)) ([4844622](https://github.com/microsoft/RD-Agent/commit/4844622e5fd28d7cbaabd9d7888f8204c60b76b3))
* try coder on whole data ([#1017](https://github.com/microsoft/RD-Agent/issues/1017)) ([4973e05](https://github.com/microsoft/RD-Agent/commit/4973e0532248c6172eec3bb70dffda052af2d14f))
### Bug Fixes
* fix a minor bug in DS eval ([#1012](https://github.com/microsoft/RD-Agent/issues/1012)) ([5a520e9](https://github.com/microsoft/RD-Agent/commit/5a520e9d44899d44fddc0f2e5571596223161b71))
* fix some bugs in quant scen ([#1026](https://github.com/microsoft/RD-Agent/issues/1026)) ([7b34d41](https://github.com/microsoft/RD-Agent/commit/7b34d418642d1c0c2986db9ecf6a5d9bc22cc3da))
* support experimental support for Deepseek models and update docs about configuration ([#1024](https://github.com/microsoft/RD-Agent/issues/1024)) ([35cfc19](https://github.com/microsoft/RD-Agent/commit/35cfc193f9b35d786aeb7585334427ad358c982f))
## [0.6.1](https://github.com/microsoft/RD-Agent/compare/v0.6.0...v0.6.1) (2025-06-28)
### Bug Fixes
* fix mount ([#1001](https://github.com/microsoft/RD-Agent/issues/1001)) ([4ae2f13](https://github.com/microsoft/RD-Agent/commit/4ae2f1303dfcbaea53d459be7c8e85bf85ce5f4f))
* handle the bug of wrong dag_parant index ([#996](https://github.com/microsoft/RD-Agent/issues/996)) ([bda12ff](https://github.com/microsoft/RD-Agent/commit/bda12ffecf9ae116e0d04eece0c6a1b61413d916))
* improve log folder sorting and selection UX ([#993](https://github.com/microsoft/RD-Agent/issues/993)) ([b116807](https://github.com/microsoft/RD-Agent/commit/b11680777f116b6c40f9e535e0da10c186c95050))
## [0.6.0](https://github.com/microsoft/RD-Agent/compare/v0.5.0...v0.6.0) (2025-06-26)
### Features
* async mechanism for multi-trace ([#981](https://github.com/microsoft/RD-Agent/issues/981)) ([9e60c32](https://github.com/microsoft/RD-Agent/commit/9e60c32cf348481eb55617809c059c359d7603b8))
### Bug Fixes
* add async to direct_exp_gen avoid infinite loop ([#992](https://github.com/microsoft/RD-Agent/issues/992)) ([78c203d](https://github.com/microsoft/RD-Agent/commit/78c203d8eefbba67fc120b35cb25e85b2200ac49))
* docker container cleanup to prevent accumulation and system slowdown ([#975](https://github.com/microsoft/RD-Agent/issues/975)) ([05cf094](https://github.com/microsoft/RD-Agent/commit/05cf094913e48c903c8a4476d6c609d8bfa10681))
* fix a bug and update the docs ([#978](https://github.com/microsoft/RD-Agent/issues/978)) ([d1ae9e1](https://github.com/microsoft/RD-Agent/commit/d1ae9e1dcc2ccd1ffe05cb1c6db3e905fa70425c))
* merge datascience v3 and v2 ([#974](https://github.com/microsoft/RD-Agent/issues/974)) ([1ba7548](https://github.com/microsoft/RD-Agent/commit/1ba754853ce2010ce1cb0bbd217b67689fa1ebdf))
* refine details ([#979](https://github.com/microsoft/RD-Agent/issues/979)) ([25caa3d](https://github.com/microsoft/RD-Agent/commit/25caa3d00c255286dce27915b9355987b87ed2e8))
* refine prompt ([#987](https://github.com/microsoft/RD-Agent/issues/987)) ([76df96e](https://github.com/microsoft/RD-Agent/commit/76df96ee88212a8aee7f518b9cacf80591dc2939))
## [0.5.0](https://github.com/microsoft/RD-Agent/compare/v0.4.0...v0.5.0) (2025-06-18)
### Features
* add a check for whether values in score_df are NaN ([#756](https://github.com/microsoft/RD-Agent/issues/756)) ([d9cc780](https://github.com/microsoft/RD-Agent/commit/d9cc78098beb27f3a1bf2f2d461302db177b7d41))
* add competition level filter and extract constants to utils ([#869](https://github.com/microsoft/RD-Agent/issues/869)) ([b40b605](https://github.com/microsoft/RD-Agent/commit/b40b6055368e6c72d8435352104b1c281b06da7f))
* add DocDev for auto-generating workspace documentation ([#781](https://github.com/microsoft/RD-Agent/issues/781)) ([bcba6ea](https://github.com/microsoft/RD-Agent/commit/bcba6eac32684ebb267c93b4e85dbfa9561d15d1))
* add drafting pipeline ([#832](https://github.com/microsoft/RD-Agent/issues/832)) ([efedddf](https://github.com/microsoft/RD-Agent/commit/efedddf39bc19221fdffc2e39ee0a09097fc82b0))
* add last_exp_fb to DSTrace and update feedback retrieval usage ([#910](https://github.com/microsoft/RD-Agent/issues/910)) ([10531fd](https://github.com/microsoft/RD-Agent/commit/10531fda9438c6915b26d5013bd2413e1333ceb9))
* add mlflow logger in RD loop to log ([#815](https://github.com/microsoft/RD-Agent/issues/815)) ([b91b54f](https://github.com/microsoft/RD-Agent/commit/b91b54f355c26b751087d0c14774f466e82866de))
* add naive experiment generator and update proposal configurations ([#759](https://github.com/microsoft/RD-Agent/issues/759)) ([75494f4](https://github.com/microsoft/RD-Agent/commit/75494f4fed5bc845acfd7f7bacef385f0f96c514))
* add RD-Agent-Quant scenario ([#838](https://github.com/microsoft/RD-Agent/issues/838)) ([6e42d52](https://github.com/microsoft/RD-Agent/commit/6e42d523a85df67aa13927abbf0894564c71880e))
* add reasoning_effort parameter to LiteLLMAPIBackend and LLMSett… ([#754](https://github.com/microsoft/RD-Agent/issues/754)) ([113889f](https://github.com/microsoft/RD-Agent/commit/113889fefe9b09aaea1b564704c81664b8f77ec5))
* add reviewer in feedback ([#765](https://github.com/microsoft/RD-Agent/issues/765)) ([1a95bee](https://github.com/microsoft/RD-Agent/commit/1a95bee6aa6bc6f45fdeb484f3a6f81caa273038))
* advanced checkpoint selectors ([#790](https://github.com/microsoft/RD-Agent/issues/790)) ([50ea033](https://github.com/microsoft/RD-Agent/commit/50ea0336e93d8cb39fb871e81a3f61abdf293bc7))
* archive python and csv files in workspace to maintain results ([#814](https://github.com/microsoft/RD-Agent/issues/814)) ([67d0e01](https://github.com/microsoft/RD-Agent/commit/67d0e01e7c9237da1371d93cbf9d86f5f46faac4))
* checkpoint selection ([#744](https://github.com/microsoft/RD-Agent/issues/744)) ([a15a06a](https://github.com/microsoft/RD-Agent/commit/a15a06ad643977db59d7cac9da52e637cf80395a))
* custom data ([#810](https://github.com/microsoft/RD-Agent/issues/810)) ([6322916](https://github.com/microsoft/RD-Agent/commit/632291608cf605bd8bcfcab0017824823bdecdb8))
* dump model ([#776](https://github.com/microsoft/RD-Agent/issues/776)) ([b49481e](https://github.com/microsoft/RD-Agent/commit/b49481e073e6f536d2b1b3bd2d01229ed05abdea))
* enable to set different version of idea-proposal for multi traces ([#895](https://github.com/microsoft/RD-Agent/issues/895)) ([236c28f](https://github.com/microsoft/RD-Agent/commit/236c28f29c6bc5da62129632e464bbc32056ebdb))
* enhance compatibility with more LLM models ([#905](https://github.com/microsoft/RD-Agent/issues/905)) ([8800624](https://github.com/microsoft/RD-Agent/commit/8800624ad4749d6e798785a082c9f94c306792ef))
* idea pool integrated to exp_gen & add timer to RD-Agent & pause-resume to RD-loops ([#795](https://github.com/microsoft/RD-Agent/issues/795)) ([e62aefa](https://github.com/microsoft/RD-Agent/commit/e62aefa56e34ff45a8ed033f7bf28b95c8e63656))
* joblib cache ([#749](https://github.com/microsoft/RD-Agent/issues/749)) ([83a0411](https://github.com/microsoft/RD-Agent/commit/83a041148ff908871b1906f9e6889d80ab513412))
* log api status to mlflow ([#860](https://github.com/microsoft/RD-Agent/issues/860)) ([049921b](https://github.com/microsoft/RD-Agent/commit/049921beb0b4ed0ba1ab7508d9857d2c1e729349))
* log reaching max time limit before breaking CoSTEER evolution ([#921](https://github.com/microsoft/RD-Agent/issues/921)) ([837fff2](https://github.com/microsoft/RD-Agent/commit/837fff29096fefe1369d386ef8a860395b737173))
* merge failed and successful traces together ([#766](https://github.com/microsoft/RD-Agent/issues/766)) ([3a2aa8c](https://github.com/microsoft/RD-Agent/commit/3a2aa8cf0102647950b2dfc0007c118b0c799cd4))
* merge selectively ([#888](https://github.com/microsoft/RD-Agent/issues/888)) ([06ba314](https://github.com/microsoft/RD-Agent/commit/06ba314ff0f91e7e78e8d456c719ac3194a8c774))
* multi-trace online merge ([#886](https://github.com/microsoft/RD-Agent/issues/886)) ([2112d67](https://github.com/microsoft/RD-Agent/commit/2112d676d0938de6fea163b2e5eb9c36771e7041))
* new proposal (structured outputs) prompts ([#887](https://github.com/microsoft/RD-Agent/issues/887)) ([150796a](https://github.com/microsoft/RD-Agent/commit/150796aaa72eaa5037fd7db8e785058fbc4d4967))
* parallel loop running based on asyncio ([#932](https://github.com/microsoft/RD-Agent/issues/932)) ([c63e207](https://github.com/microsoft/RD-Agent/commit/c63e2071f3179feef69f88061c0172cb5c3157f2))
* propose hypothesis across multiple parts in pipeline ([#827](https://github.com/microsoft/RD-Agent/issues/827)) ([acb0e21](https://github.com/microsoft/RD-Agent/commit/acb0e21a331410d044849e12e2887f41e5ff1c3a))
* pull image with progress ([#777](https://github.com/microsoft/RD-Agent/issues/777)) ([5cad086](https://github.com/microsoft/RD-Agent/commit/5cad0860204ede974533dc7bdc9808cfd135fa24))
* raise error when timeout in api call ([#793](https://github.com/microsoft/RD-Agent/issues/793)) ([eafd4df](https://github.com/microsoft/RD-Agent/commit/eafd4dfc6263f19a8cdaf27498a1d07b43815306))
* raise policy violation ([#894](https://github.com/microsoft/RD-Agent/issues/894)) ([5b9d007](https://github.com/microsoft/RD-Agent/commit/5b9d0072aebe15369e9a0010af83e71684baeae7))
* reanalyze competition info & pipeline coding evaluator prompt ([#837](https://github.com/microsoft/RD-Agent/issues/837)) ([f7b5258](https://github.com/microsoft/RD-Agent/commit/f7b52580080c75d311355bcc6193b49495801809))
* refine merge ([#842](https://github.com/microsoft/RD-Agent/issues/842)) ([99463b4](https://github.com/microsoft/RD-Agent/commit/99463b46819b3a0dcb2bb12a823a9cdf7ec560b4))
* refine prompt ([#760](https://github.com/microsoft/RD-Agent/issues/760)) ([a91b182](https://github.com/microsoft/RD-Agent/commit/a91b182c4c9510eb34e4aab956588e909fa5d70b))
* replace hard-coded cache paths with dynamic cache_path config ([#952](https://github.com/microsoft/RD-Agent/issues/952)) ([db56894](https://github.com/microsoft/RD-Agent/commit/db568947f1084a80d603718f5a13fdbd72b90a47))
* revert draft stage into a soft decay in hypothesis selection ([#849](https://github.com/microsoft/RD-Agent/issues/849)) ([d41db0c](https://github.com/microsoft/RD-Agent/commit/d41db0ca357b07091825ebd9d18c303b6db3cc6a))
* trace merging ([#836](https://github.com/microsoft/RD-Agent/issues/836)) ([a3d5473](https://github.com/microsoft/RD-Agent/commit/a3d547369e408a05cff570c1239b6320be40418d))
* truncate by time ([#863](https://github.com/microsoft/RD-Agent/issues/863)) ([2b9427a](https://github.com/microsoft/RD-Agent/commit/2b9427ae036ffe1e28a717502f45500fe91fe5ac))
* update prompt to improve json respond format of some LLM models ([#928](https://github.com/microsoft/RD-Agent/issues/928)) ([0b84709](https://github.com/microsoft/RD-Agent/commit/0b84709e59c7abb9754961cd17cc9673fcf508aa))
* using different chat model in different part ([#822](https://github.com/microsoft/RD-Agent/issues/822)) ([c052ea6](https://github.com/microsoft/RD-Agent/commit/c052ea6d1f8948183a4a6ebc873ec01b57373cce))
### Bug Fixes
* 'DSProposalV2ExpGen' object has no attribute 'COMPONENT_TASK_MAP… ([#950](https://github.com/microsoft/RD-Agent/issues/950)) ([e353895](https://github.com/microsoft/RD-Agent/commit/e353895251f231fee85abdcb1b22b022a577af77))
* adapting UI to mock trace ([#841](https://github.com/microsoft/RD-Agent/issues/841)) ([8a5754c](https://github.com/microsoft/RD-Agent/commit/8a5754c9b9c9410d0943aeed777a93c13422e54a))
* add missing semicolon after chmod in env shell command ([#955](https://github.com/microsoft/RD-Agent/issues/955)) ([1128eaa](https://github.com/microsoft/RD-Agent/commit/1128eaa89ec1dcab4a05ef50d64c7f7e6aae88a8))
* add time to timer when api timeout bug ([#826](https://github.com/microsoft/RD-Agent/issues/826)) ([f45d6ae](https://github.com/microsoft/RD-Agent/commit/f45d6ae6595c1c39b389485b637a0ae53ffc8782))
* add wait_retry to exp_gen v2 ([#783](https://github.com/microsoft/RD-Agent/issues/783)) ([b9fb7cf](https://github.com/microsoft/RD-Agent/commit/b9fb7cf4e3070062d91b5b67d0f10d6266b45142))
* adjust ds_trace lookup and add stderr redirect to mlebench command ([#853](https://github.com/microsoft/RD-Agent/issues/853)) ([4e53108](https://github.com/microsoft/RD-Agent/commit/4e53108e020db719b39cba3a67e0c6dae3de19cf))
* align competion_full_desc and scenario_all_desc, remove redundant info in problems proposal ([#808](https://github.com/microsoft/RD-Agent/issues/808)) ([76d8536](https://github.com/microsoft/RD-Agent/commit/76d8536d9ec53952383019306781d49cb3e9f75c))
* bug fix in timer start ([#807](https://github.com/microsoft/RD-Agent/issues/807)) ([9af7161](https://github.com/microsoft/RD-Agent/commit/9af7161eb57bdd2e24b072335e9d185951c32472))
* bug in problem identification ([#806](https://github.com/microsoft/RD-Agent/issues/806)) ([e1d5a29](https://github.com/microsoft/RD-Agent/commit/e1d5a2914046476f2f10d5884ed3c3ff956d65ff))
* conda error information ([#941](https://github.com/microsoft/RD-Agent/issues/941)) ([fd39a94](https://github.com/microsoft/RD-Agent/commit/fd39a947763fb4a9be87b907c399bebe384df505))
* default cost to NaN when calculation fails in LiteLLM backend ([#912](https://github.com/microsoft/RD-Agent/issues/912)) ([51a4048](https://github.com/microsoft/RD-Agent/commit/51a4048129cbfbc3b84bcf50fd8866fafb3e2da3))
* ds trace ([#929](https://github.com/microsoft/RD-Agent/issues/929)) ([127e441](https://github.com/microsoft/RD-Agent/commit/127e441602e21a46d6313ff39133ab8ca841937e))
* duplicate model names test in pipeline coder & runner ([#763](https://github.com/microsoft/RD-Agent/issues/763)) ([be3ee9d](https://github.com/microsoft/RD-Agent/commit/be3ee9da9882edda3c06ff7d1099d1bbda2203c3))
* filter system metadata dirs and init missing DSTrace attribute ([#946](https://github.com/microsoft/RD-Agent/issues/946)) ([10050ef](https://github.com/microsoft/RD-Agent/commit/10050ef368ae7ec07cbf20ac4e52e21c2875eaab))
* fix a bug in docker result extraction ([#824](https://github.com/microsoft/RD-Agent/issues/824)) ([e1c0f98](https://github.com/microsoft/RD-Agent/commit/e1c0f9826abcbc11dda215a600a2637c9ac6e984))
* fix competition metric direction ([#784](https://github.com/microsoft/RD-Agent/issues/784)) ([3be0057](https://github.com/microsoft/RD-Agent/commit/3be0057556f46c899065ee1c7f9bafe33e79249c))
* fix model input shape bug and costeer_model bug ([#821](https://github.com/microsoft/RD-Agent/issues/821)) ([b34bd89](https://github.com/microsoft/RD-Agent/commit/b34bd895d6d9c326aab85856a15be0cb72b2c4c8))
* fix some minor bugs ([#758](https://github.com/microsoft/RD-Agent/issues/758)) ([963f96e](https://github.com/microsoft/RD-Agent/commit/963f96e5596bee04074135c2a0e31a8adc39ad8c))
* fix some minor bugs in qlib scenario ([#817](https://github.com/microsoft/RD-Agent/issues/817)) ([79962a7](https://github.com/microsoft/RD-Agent/commit/79962a7ca40c77a3997a68da9ad1b5ab16728483))
* fix the bug in the regular expression matching for stdout ([#890](https://github.com/microsoft/RD-Agent/issues/890)) ([ee57e37](https://github.com/microsoft/RD-Agent/commit/ee57e37a22af874b262c033d1606dbe7799706db))
* fix the bug of Exceed-LLM-Context in online merge of multi-tarce ([#892](https://github.com/microsoft/RD-Agent/issues/892)) ([f760a3e](https://github.com/microsoft/RD-Agent/commit/f760a3eff7bd927a31e4958ed2f706312e83e3e3))
* fix the problems weights bug ([#898](https://github.com/microsoft/RD-Agent/issues/898)) ([013d79f](https://github.com/microsoft/RD-Agent/commit/013d79f12060e908aeb57c3eb1bb56eea86df086))
* fixed CI execution failures caused by document builds ([#857](https://github.com/microsoft/RD-Agent/issues/857)) ([5c116b2](https://github.com/microsoft/RD-Agent/commit/5c116b24ce727f6ed9ef39d5aa5b60442038c344))
* get_metric_direction for aerial-cactus-identification ([#970](https://github.com/microsoft/RD-Agent/issues/970)) ([70dc62d](https://github.com/microsoft/RD-Agent/commit/70dc62de5fbd4272ecda1b6fcbcf898b3624a991))
* import path of T ([#787](https://github.com/microsoft/RD-Agent/issues/787)) ([ac008a6](https://github.com/microsoft/RD-Agent/commit/ac008a61d03b4737ab3d994024e922839d8f3fe1))
* improve eval alignment check (e.g. small-scale finetuning) ([#802](https://github.com/microsoft/RD-Agent/issues/802)) ([d391578](https://github.com/microsoft/RD-Agent/commit/d3915788082de640a4ce1eea6d2e607319b89c3e))
* improve file tree and _walk symlink handling ([#877](https://github.com/microsoft/RD-Agent/issues/877)) ([516cb69](https://github.com/microsoft/RD-Agent/commit/516cb69357483ddd99f84b221a056d8491c34f9b))
* log info ([#965](https://github.com/microsoft/RD-Agent/issues/965)) ([f1dbc21](https://github.com/microsoft/RD-Agent/commit/f1dbc2100498e22c8e5edbb2e4563c99c3d54775))
* main bug ([#938](https://github.com/microsoft/RD-Agent/issues/938)) ([c6d34d6](https://github.com/microsoft/RD-Agent/commit/c6d34d67b8aedf5496bf6a875915ce657fc58448))
* non-exist variable test_eval.py ([#847](https://github.com/microsoft/RD-Agent/issues/847)) ([4948c38](https://github.com/microsoft/RD-Agent/commit/4948c38560f4cf021d9354b201b22dfa5ccb9441))
* refine feedback prompt ([#901](https://github.com/microsoft/RD-Agent/issues/901)) ([12bb2c4](https://github.com/microsoft/RD-Agent/commit/12bb2c4a1494b9aa29962905abb5e433a60eb716))
* refine the time/memory constraints prompt in hypothesis proposal ([#856](https://github.com/microsoft/RD-Agent/issues/856)) ([51ce8ef](https://github.com/microsoft/RD-Agent/commit/51ce8ef84b4fe6590ce20599a56eee596f2f04e6))
* Set PYTHONPATH in env.run_ret_code call in FBWorkspace class ([#755](https://github.com/microsoft/RD-Agent/issues/755)) ([68b5018](https://github.com/microsoft/RD-Agent/commit/68b501889caca754f27b57d9ab6f72184e93b15c))
* task_gen for better understanding ([#752](https://github.com/microsoft/RD-Agent/issues/752)) ([6bfc1e5](https://github.com/microsoft/RD-Agent/commit/6bfc1e570449ee69ac110a4ced9a7cecbc0e6a73))
* trace list but ([#852](https://github.com/microsoft/RD-Agent/issues/852)) ([32cdc57](https://github.com/microsoft/RD-Agent/commit/32cdc575bde103d71a358d4d99bd413076328ebd))
* typo in workflow ([#861](https://github.com/microsoft/RD-Agent/issues/861)) ([0e54c9f](https://github.com/microsoft/RD-Agent/commit/0e54c9fe41d25a4cc45ab9e61bb2c2c01b854751))
* update DS env setup with competition volume and timeout ([#878](https://github.com/microsoft/RD-Agent/issues/878)) ([816ada0](https://github.com/microsoft/RD-Agent/commit/816ada096afabe90578672b0e61b656802a30b62))
* update feedback.py ([#772](https://github.com/microsoft/RD-Agent/issues/772)) ([133778c](https://github.com/microsoft/RD-Agent/commit/133778c67ee3349f1c2fe029bcf6a9ee14568efe))
* update metric direction to return bool ([#791](https://github.com/microsoft/RD-Agent/issues/791)) ([0bf365e](https://github.com/microsoft/RD-Agent/commit/0bf365e7830aa86d2350b9d1c47410af46b3a7e8))
* update runner max loop to 1 in DS scenario ([#820](https://github.com/microsoft/RD-Agent/issues/820)) ([3da378e](https://github.com/microsoft/RD-Agent/commit/3da378e986e8b776a17dbc694d29ef211192ed3e))
* use fallback messages for missing submission and scores files ([#882](https://github.com/microsoft/RD-Agent/issues/882)) ([898fdea](https://github.com/microsoft/RD-Agent/commit/898fdeae80801d537ebc5c4a3b7df9de74c3403a))
* use simple stdout and stderr ([#966](https://github.com/microsoft/RD-Agent/issues/966)) ([0b1c445](https://github.com/microsoft/RD-Agent/commit/0b1c445f1f0c212887ffff9f8fac44236df3607c))
* use trace count as index ([#909](https://github.com/microsoft/RD-Agent/issues/909)) ([b87de56](https://github.com/microsoft/RD-Agent/commit/b87de56e54b206b3aada53850804474eff80b96d))
* wrong variable test_eval.py ([#846](https://github.com/microsoft/RD-Agent/issues/846)) ([808ea6c](https://github.com/microsoft/RD-Agent/commit/808ea6cba541e60c35dd283cee9098ce46f2a59e))
## [0.4.0](https://github.com/microsoft/RD-Agent/compare/v0.3.0...v0.4.0) (2025-04-04)
### Features
* (Kaggle) add base template for competition: tabular-playground-series-may-2022 ([#481](https://github.com/microsoft/RD-Agent/issues/481)) ([f3405ca](https://github.com/microsoft/RD-Agent/commit/f3405ca732eb0ddca8e18ea72f69cbd86055c4ab))
* a unified CoSTEER to fit more scenarios ([#491](https://github.com/microsoft/RD-Agent/issues/491)) ([cddbd02](https://github.com/microsoft/RD-Agent/commit/cddbd02e3ad3ccf6ad01443777319dc5c7eb08a7))
* add a new competition ([#474](https://github.com/microsoft/RD-Agent/issues/474)) ([2fc0d77](https://github.com/microsoft/RD-Agent/commit/2fc0d77c485a31f647e21f4578e2e326f7032964))
* add a tool to enable saving workspace files into a specific folder ([#728](https://github.com/microsoft/RD-Agent/issues/728)) ([bca864b](https://github.com/microsoft/RD-Agent/commit/bca864b7edeafe3f88405efb695ca8acad6252f8))
* add baseline score stat ([#590](https://github.com/microsoft/RD-Agent/issues/590)) ([2948026](https://github.com/microsoft/RD-Agent/commit/2948026c390d067b643f8c8247c1447f1dc023e4))
* add configurable volume mode for Docker volumes in env.py ([#537](https://github.com/microsoft/RD-Agent/issues/537)) ([642a022](https://github.com/microsoft/RD-Agent/commit/642a02239431411b91959f23e69b454997ca75d5))
* add constraint labels for semantic search ([#680](https://github.com/microsoft/RD-Agent/issues/680)) ([0584cfc](https://github.com/microsoft/RD-Agent/commit/0584cfcd13ca1a62c85390ea2ee7574370748d31))
* add cross validation to workflow ([#700](https://github.com/microsoft/RD-Agent/issues/700)) ([82e9b00](https://github.com/microsoft/RD-Agent/commit/82e9b00be62b01673353a7aaa3ab0e2e3ecaf3ca))
* add describe_data_folder_v2 ([#738](https://github.com/microsoft/RD-Agent/issues/738)) ([bc8e846](https://github.com/microsoft/RD-Agent/commit/bc8e8460e0246321792ff3347b1b8905416ad075))
* add do_truncate control for the load function ([#656](https://github.com/microsoft/RD-Agent/issues/656)) ([2b960a5](https://github.com/microsoft/RD-Agent/commit/2b960a58dfdeba69522a0f72ecf0975bb6ae87ee))
* add do_truncate control for the load function ([#656](https://github.com/microsoft/RD-Agent/issues/656)) ([2b960a5](https://github.com/microsoft/RD-Agent/commit/2b960a58dfdeba69522a0f72ecf0975bb6ae87ee))
* add eda to data science scenario ([#639](https://github.com/microsoft/RD-Agent/issues/639)) ([35aa479](https://github.com/microsoft/RD-Agent/commit/35aa479f00edf118d43ec228e0a84c155332957a))
* add hypothesis guidelines and rule-based ranking ([#746](https://github.com/microsoft/RD-Agent/issues/746)) ([c077b82](https://github.com/microsoft/RD-Agent/commit/c077b8239cc72904c4bc450845ed2a11aa5445f0))
* Add line length limit to shrink_text function and settings ([#715](https://github.com/microsoft/RD-Agent/issues/715)) ([75ed5e1](https://github.com/microsoft/RD-Agent/commit/75ed5e1c2ce1bf20bb55190c10a4134e04694d2b))
* add loop_n parameter to the main loop ([#611](https://github.com/microsoft/RD-Agent/issues/611)) ([778c166](https://github.com/microsoft/RD-Agent/commit/778c166962250e3b9e7ad85de37f62297d370b45))
* add max time config to costeer in data science ([#645](https://github.com/microsoft/RD-Agent/issues/645)) ([534686c](https://github.com/microsoft/RD-Agent/commit/534686c2ba7d9fa979c0762ad3177c36f6d7f4cb))
* add mlebench submission validitor ([#545](https://github.com/microsoft/RD-Agent/issues/545)) ([712d94a](https://github.com/microsoft/RD-Agent/commit/712d94a7d6f22187fc3d18bd434e71ec6997aa9f))
* add model removal and adjust some framework logic ([#681](https://github.com/microsoft/RD-Agent/issues/681)) ([1edf881](https://github.com/microsoft/RD-Agent/commit/1edf881c63512d351c0dd074d7a1c0965ff3119b))
* add output_path to load function of LoopBase ([#628](https://github.com/microsoft/RD-Agent/issues/628)) ([dd33726](https://github.com/microsoft/RD-Agent/commit/dd33726ac5de75dc2030d193d457d59490b3361e))
* add pipeline coder ([#742](https://github.com/microsoft/RD-Agent/issues/742)) ([759f295](https://github.com/microsoft/RD-Agent/commit/759f295dbf1224e177006e72d694e42dd6f372b6))
* add rank into report (mle_summary) ([#665](https://github.com/microsoft/RD-Agent/issues/665)) ([13f7922](https://github.com/microsoft/RD-Agent/commit/13f7922aaae9e4143aac4ad08ec1c556c2faf04e))
* add restart and fix unzip ([#538](https://github.com/microsoft/RD-Agent/issues/538)) ([ed2c7d1](https://github.com/microsoft/RD-Agent/commit/ed2c7d175f1f44ca06ad7a63b08da12f6c4df9ab))
* add retry mechanism with wait_retry decorator and refactor diff generation ([#572](https://github.com/microsoft/RD-Agent/issues/572)) ([de1cd72](https://github.com/microsoft/RD-Agent/commit/de1cd72f068ebd1e1bd5bc2ad2b12ae484d54831))
* add the shape of the CSV to the dataset description ([#561](https://github.com/microsoft/RD-Agent/issues/561)) ([a10c881](https://github.com/microsoft/RD-Agent/commit/a10c881bd86796e6167257ad26dd165f7e46d813))
* add timeout settings and cleanup step in data science runner ([#539](https://github.com/microsoft/RD-Agent/issues/539)) ([295abd5](https://github.com/microsoft/RD-Agent/commit/295abd56f7b58055bd27b247dfed47eb85e9b0cd))
* add type checker to api backend & align litellm and old backend ([#647](https://github.com/microsoft/RD-Agent/issues/647)) ([d38eae9](https://github.com/microsoft/RD-Agent/commit/d38eae986a0ba69d71288fa09fcc21e227551a02))
* align mlebench data and evaluation & several fix on kaggle workflow ([#477](https://github.com/microsoft/RD-Agent/issues/477)) ([f6c522b](https://github.com/microsoft/RD-Agent/commit/f6c522b651db3c1f6af6815347589917f46e433a))
* **backend:** integrate LiteLLM API Backend ([#564](https://github.com/microsoft/RD-Agent/issues/564)) ([f477687](https://github.com/microsoft/RD-Agent/commit/f4776879c76a213d53875b307c94be1ea5cfd9ba))
* base data science scenario UI ([#525](https://github.com/microsoft/RD-Agent/issues/525)) ([39917b3](https://github.com/microsoft/RD-Agent/commit/39917b354b22a8488a17396fe2245cb41e3def03))
* condaenv & full docker env ([#668](https://github.com/microsoft/RD-Agent/issues/668)) ([084dd6d](https://github.com/microsoft/RD-Agent/commit/084dd6d748a89492ea0888acb316b9bb9efeb62f))
* diff mode fix ([#569](https://github.com/microsoft/RD-Agent/issues/569)) ([0c509f5](https://github.com/microsoft/RD-Agent/commit/0c509f599ce19303b44d8192ec3eb634c24992d6))
* display LLM prompt ([#676](https://github.com/microsoft/RD-Agent/issues/676)) ([8c93bba](https://github.com/microsoft/RD-Agent/commit/8c93bba82e185edcf4204cc574df5f41bcdfa9d2))
* Dynamically find and use sample submission file in eval tests ([#542](https://github.com/microsoft/RD-Agent/issues/542)) ([5f12b44](https://github.com/microsoft/RD-Agent/commit/5f12b44c89dd26b250e914192f9beb2da38fb3ab))
* end-to-end optimization ([#473](https://github.com/microsoft/RD-Agent/issues/473)) ([d41343a](https://github.com/microsoft/RD-Agent/commit/d41343a63d87bf3479f5ec30745ea788580495bf))
* Enhance eval script with file cleanup and detailed submission checks ([#529](https://github.com/microsoft/RD-Agent/issues/529)) ([cf2ff92](https://github.com/microsoft/RD-Agent/commit/cf2ff9213d3a8b0fad64df7cae0c35f996d72e27))
* exclude invalid session log folder ([#554](https://github.com/microsoft/RD-Agent/issues/554)) ([fa86e4d](https://github.com/microsoft/RD-Agent/commit/fa86e4d1805000e0e5779c662ccbb5273fda623c))
* improve the framework's ability to adaptively adjust the model ([#629](https://github.com/microsoft/RD-Agent/issues/629)) ([93806f3](https://github.com/microsoft/RD-Agent/commit/93806f33a1e0f29a125e29303d4b984a9817c3c0))
* independent use_azure_token_provider on chat and embedding ([#452](https://github.com/microsoft/RD-Agent/issues/452)) ([d223004](https://github.com/microsoft/RD-Agent/commit/d223004917692e231b251330cbc8676081d5a10d))
* integrate azure deepseek r1 ([#591](https://github.com/microsoft/RD-Agent/issues/591)) ([e79ce5c](https://github.com/microsoft/RD-Agent/commit/e79ce5c38539138abe04eb9809fbde437e97bbb7))
* kaggle refactor ([#489](https://github.com/microsoft/RD-Agent/issues/489)) ([1b057d0](https://github.com/microsoft/RD-Agent/commit/1b057d0d63a861fba4b3cb59c6c5fc1a0e3da383))
* **kaggle:** several update in kaggle scenarios ([#476](https://github.com/microsoft/RD-Agent/issues/476)) ([245d211](https://github.com/microsoft/RD-Agent/commit/245d211dcbfb18ebcc554247a0e3a8dbecf6f3bd))
* loader prompt & simplify YAML loading and update data loader specifications ([#736](https://github.com/microsoft/RD-Agent/issues/736)) ([86f8bbf](https://github.com/microsoft/RD-Agent/commit/86f8bbf15895e7c198f9bc395d055ca5f02a5bb6))
* make spec optional ([#719](https://github.com/microsoft/RD-Agent/issues/719)) ([a16b70f](https://github.com/microsoft/RD-Agent/commit/a16b70ff34c66d7e1c4c7ff5236eca8e7d8abea9))
* Make system prompt role customizable in LLM settings ([#632](https://github.com/microsoft/RD-Agent/issues/632)) ([e4acd92](https://github.com/microsoft/RD-Agent/commit/e4acd92cc5eec6db5c29cb2d4788020fb89099b7))
* multi log folder, replace "epxx" in workspace path ([#555](https://github.com/microsoft/RD-Agent/issues/555)) ([8a69c9c](https://github.com/microsoft/RD-Agent/commit/8a69c9c9630860c9b644356e1f71654aea222328))
* new exp gen v2 implementation ([#725](https://github.com/microsoft/RD-Agent/issues/725)) ([5dcc2d5](https://github.com/microsoft/RD-Agent/commit/5dcc2d5fa63bbe9ae8c4817d9b40b77600440edb))
* new-york-city-taxi-fare-prediction_template ([#488](https://github.com/microsoft/RD-Agent/issues/488)) ([a9caab7](https://github.com/microsoft/RD-Agent/commit/a9caab7bc5dc86f395a008e523355922137aef17))
* out spec change for o1-preview ([#666](https://github.com/microsoft/RD-Agent/issues/666)) ([22894bd](https://github.com/microsoft/RD-Agent/commit/22894bdbee26b9cad73646d2975857787e515f75))
* refactor for general data science ([#498](https://github.com/microsoft/RD-Agent/issues/498)) ([7002dc4](https://github.com/microsoft/RD-Agent/commit/7002dc4981a4f72096b438d2fe4fd9ff268c54f3))
* refine logic for qlib_factor_from_report ([#463](https://github.com/microsoft/RD-Agent/issues/463)) ([21348d8](https://github.com/microsoft/RD-Agent/commit/21348d89e0e0eec1b4fab4e7a497f1eb34b8fe72))
* run benchmark on gpt-4o & llama 3.1 ([#497](https://github.com/microsoft/RD-Agent/issues/497)) ([64af0b5](https://github.com/microsoft/RD-Agent/commit/64af0b5529b687cce8b5b7a1893946e15edca626))
* summary and UI update ([#581](https://github.com/microsoft/RD-Agent/issues/581)) ([efa51f9](https://github.com/microsoft/RD-Agent/commit/efa51f9c259a06fe219f3137f0a1005e50d2bfdd))
* template changes for some kaggle competitions ([#484](https://github.com/microsoft/RD-Agent/issues/484)) ([2e38000](https://github.com/microsoft/RD-Agent/commit/2e38000091030811fc081d72016c7bbadf7efd50))
* track and log accumulated completion cost in LiteLLMAPIBackend ([#727](https://github.com/microsoft/RD-Agent/issues/727)) ([b294a95](https://github.com/microsoft/RD-Agent/commit/b294a95e0b7b2ef96af355cebac92d9c87f3acab))
* update prompts and descriptions for data science components ([#731](https://github.com/microsoft/RD-Agent/issues/731)) ([c20e226](https://github.com/microsoft/RD-Agent/commit/c20e226c3e7771c9fcd1c879a8937e4694dc03eb))
* variable printing tool of data_science coder testing ([#658](https://github.com/microsoft/RD-Agent/issues/658)) ([116c061](https://github.com/microsoft/RD-Agent/commit/116c06190b01f0b621c021726a1be23458ab1154))
### Bug Fixes
* a default conf in scen qlib ([#503](https://github.com/microsoft/RD-Agent/issues/503)) ([d64a228](https://github.com/microsoft/RD-Agent/commit/d64a228525cbedd7687c1e06132eacd0d0647697))
* a small bug in exp_gen ([#606](https://github.com/microsoft/RD-Agent/issues/606)) ([f734dde](https://github.com/microsoft/RD-Agent/commit/f734dde0b0101e13f38151468c8ddf9e23af26ac))
* add check when retrying gen model codes ([#699](https://github.com/microsoft/RD-Agent/issues/699)) ([3b82f15](https://github.com/microsoft/RD-Agent/commit/3b82f159474087902d3c6007d370e3282b549015))
* add DSExperiment type check and directory validation in log proc… ([#535](https://github.com/microsoft/RD-Agent/issues/535)) ([f59b12c](https://github.com/microsoft/RD-Agent/commit/f59b12c9cc9afde82b74bc133797ff1396678627))
* add ensemble test, change to "use cross-validation if possible" in workflow spec ([#634](https://github.com/microsoft/RD-Agent/issues/634)) ([acc97a8](https://github.com/microsoft/RD-Agent/commit/acc97a8217253497afedcfa829902b4432e1031e))
* add force parameter for cache_with_pickle & using cache when get kaggle leaderboard ([#687](https://github.com/microsoft/RD-Agent/issues/687)) ([c8841e5](https://github.com/microsoft/RD-Agent/commit/c8841e590a925200859acba9fda4a17d4c3aa1c7))
* add metric name check for valid scores ([#724](https://github.com/microsoft/RD-Agent/issues/724)) ([acc2ffb](https://github.com/microsoft/RD-Agent/commit/acc2ffbde4df3b53654559d14cd035ee6be6b35e))
* add retry mechanism for GPU device check in DockerEnv ([#573](https://github.com/microsoft/RD-Agent/issues/573)) ([a780cfb](https://github.com/microsoft/RD-Agent/commit/a780cfb621dc487cc17072bfd4aedd7d581249ab))
* add scores.csv checking in ensemble_test ([#567](https://github.com/microsoft/RD-Agent/issues/567)) ([01808b4](https://github.com/microsoft/RD-Agent/commit/01808b47c314d1daffacc0a65e0ab934a1c41d65))
* add stdout context length setting and improve text shrinking logic ([#559](https://github.com/microsoft/RD-Agent/issues/559)) ([4ac26a6](https://github.com/microsoft/RD-Agent/commit/4ac26a65c1f18f7513480dd562566c8a96298aa7))
* align components' name ([#701](https://github.com/microsoft/RD-Agent/issues/701)) ([295a114](https://github.com/microsoft/RD-Agent/commit/295a1148c53d00b716b2d540573a7f43e7e2d762))
* auto continue small bug ([#598](https://github.com/microsoft/RD-Agent/issues/598)) ([75eaecf](https://github.com/microsoft/RD-Agent/commit/75eaecf36b9f70dfc2d7fedd35836acdb05f89d6))
* avoid try-except in ensemble eval prompts ([#637](https://github.com/microsoft/RD-Agent/issues/637)) ([5c58d6e](https://github.com/microsoft/RD-Agent/commit/5c58d6e524ef848024578033ab6d47bc9b220822))
* avoid warning for missing llama installation when not in use ([#509](https://github.com/microsoft/RD-Agent/issues/509)) ([5ec3422](https://github.com/microsoft/RD-Agent/commit/5ec342224c2c8c4cf591f1eae673e25b14218726))
* change devault to default ([#688](https://github.com/microsoft/RD-Agent/issues/688)) ([7f401cd](https://github.com/microsoft/RD-Agent/commit/7f401cd1c3b333285acf6d6e57654f4b9f0cb6c5))
* change ensemble test ([#622](https://github.com/microsoft/RD-Agent/issues/622)) ([5de3595](https://github.com/microsoft/RD-Agent/commit/5de35953ed0d3e2e1f4dff0e0522f2d6475079ec))
* change summary info of log folder ([#552](https://github.com/microsoft/RD-Agent/issues/552)) ([0eb258d](https://github.com/microsoft/RD-Agent/commit/0eb258d734e9a1280a238b9a6f63eb33047ee0a7))
* clarify an ambiguous explanation ([#705](https://github.com/microsoft/RD-Agent/issues/705)) ([5dbfc68](https://github.com/microsoft/RD-Agent/commit/5dbfc6859cbf6cc31932dae30cf05506108fc871))
* clarify cross_validation ([#644](https://github.com/microsoft/RD-Agent/issues/644)) ([906993e](https://github.com/microsoft/RD-Agent/commit/906993ef6482f88131d1af46f5bc66a77034b549))
* coder prompt & model test text ([#583](https://github.com/microsoft/RD-Agent/issues/583)) ([0a41227](https://github.com/microsoft/RD-Agent/commit/0a41227f267050feaeeb47ddd4d749643eb9f198))
* correct the configuration inheritance relationship ([#671](https://github.com/microsoft/RD-Agent/issues/671)) ([30b1ff8](https://github.com/microsoft/RD-Agent/commit/30b1ff8e1ce59b741e0b81481962063014641c0b))
* default emb model ([#702](https://github.com/microsoft/RD-Agent/issues/702)) ([4329a72](https://github.com/microsoft/RD-Agent/commit/4329a722832a201b3fa6f9d8f9d8d46f78110410))
* direct_exp_gen to json_target_type in DSExpGen class ([#661](https://github.com/microsoft/RD-Agent/issues/661)) ([428b74a](https://github.com/microsoft/RD-Agent/commit/428b74a988157ea864ebb40e828bd9f67589c863))
* docker error will trigger retry and data science runner loop set to 3 ([#602](https://github.com/microsoft/RD-Agent/issues/602)) ([ad785e0](https://github.com/microsoft/RD-Agent/commit/ad785e03d5db05d9191d5e772e184532835a787b))
* ensure expected type ([#593](https://github.com/microsoft/RD-Agent/issues/593)) ([098a9a6](https://github.com/microsoft/RD-Agent/commit/098a9a6618f70fa8dd276b9014b9e7ba9621553b))
* filter empty log traces in ds UI ([#533](https://github.com/microsoft/RD-Agent/issues/533)) ([1a2057c](https://github.com/microsoft/RD-Agent/commit/1a2057c9fc11edc4637f0baaa6dd226eb049c36e))
* fix a bug in cross validation ([#618](https://github.com/microsoft/RD-Agent/issues/618)) ([05a4f10](https://github.com/microsoft/RD-Agent/commit/05a4f101e0b64b860ad03294619b2350004657e8))
* fix a bug in ensemble test script ([#713](https://github.com/microsoft/RD-Agent/issues/713)) ([ad32100](https://github.com/microsoft/RD-Agent/commit/ad321000acbd9291d22fe03a9c60e57c70511c73))
* fix a bug in initial tasks ([#635](https://github.com/microsoft/RD-Agent/issues/635)) ([edb552e](https://github.com/microsoft/RD-Agent/commit/edb552ed283119444f357fbd0b6170b2ad97712a))
* fix a bug in kaggle conf ([#459](https://github.com/microsoft/RD-Agent/issues/459)) ([b4ed32b](https://github.com/microsoft/RD-Agent/commit/b4ed32b17ef07d8557450063765585a48d5fcd32))
* fix a bug in progress_bar filter ([#712](https://github.com/microsoft/RD-Agent/issues/712)) ([ba5a84d](https://github.com/microsoft/RD-Agent/commit/ba5a84dee59c39cc2a8c0d428a82da1f899ce537))
* fix a bug in proposal (add last loop's exception to last task desc) ([#596](https://github.com/microsoft/RD-Agent/issues/596)) ([419186f](https://github.com/microsoft/RD-Agent/commit/419186ffb985fe5a0aa0f7fe59c7a223e355492e))
* fix a bug in regular expression exception processing ([#734](https://github.com/microsoft/RD-Agent/issues/734)) ([67d3702](https://github.com/microsoft/RD-Agent/commit/67d37027bbcd7294a5890a350fe16fe78e0dfa77))
* fix a bug in threshold score display ([#592](https://github.com/microsoft/RD-Agent/issues/592)) ([0b0a2dc](https://github.com/microsoft/RD-Agent/commit/0b0a2dc512a5560a66464ad49de25d362d0dc17e))
* fix a bug related to model_name in ensemble ([#692](https://github.com/microsoft/RD-Agent/issues/692)) ([c6ce473](https://github.com/microsoft/RD-Agent/commit/c6ce4733f32578298abe0b60f9d82611b793cc09))
* fix a minor bug ([#694](https://github.com/microsoft/RD-Agent/issues/694)) ([1405d8d](https://github.com/microsoft/RD-Agent/commit/1405d8dafd99ecde6f3ba9dd76133d8830d03b47))
* fix an error in model_coder prompt ([#690](https://github.com/microsoft/RD-Agent/issues/690)) ([4528826](https://github.com/microsoft/RD-Agent/commit/452882674e915dbd9e3399c26c70ce5bb86d012c))
* fix combined_factors_df.pkl not loading in docker ([#697](https://github.com/microsoft/RD-Agent/issues/697)) ([3984b99](https://github.com/microsoft/RD-Agent/commit/3984b995aa74318b40de7712e100d4de5cc95b11))
* fix docs build error ([#711](https://github.com/microsoft/RD-Agent/issues/711)) ([c9e1d32](https://github.com/microsoft/RD-Agent/commit/c9e1d32d6b63560350cc7cb799c3a908e2c04e42))
* fix ExtendedSettingsConfigDict does not work ([#660](https://github.com/microsoft/RD-Agent/issues/660)) ([3a877f3](https://github.com/microsoft/RD-Agent/commit/3a877f383b908da8d027560714030b201946bb76))
* fix kaggle templates path error ([#747](https://github.com/microsoft/RD-Agent/issues/747)) ([3b3f504](https://github.com/microsoft/RD-Agent/commit/3b3f5041514baf741fe2d4613fa651fb5d9c002d))
* fix KeyError direct_exp_gen ([#735](https://github.com/microsoft/RD-Agent/issues/735)) ([7200682](https://github.com/microsoft/RD-Agent/commit/7200682ac4e60d3910c29a4f7c4a37b3d24e4224))
* fix some bugs (ensemble output, HPO, model tuning) ([#648](https://github.com/microsoft/RD-Agent/issues/648)) ([818ee29](https://github.com/microsoft/RD-Agent/commit/818ee29f8e5d4765b9801463b85b42ee9516ec33))
* fix some bugs in the ensemble component ([#595](https://github.com/microsoft/RD-Agent/issues/595)) ([c0990ab](https://github.com/microsoft/RD-Agent/commit/c0990abb06c73ae062d9a50f50cdfd6d04aded22))
* fix some bugs in workflow unit test ([#624](https://github.com/microsoft/RD-Agent/issues/624)) ([f845dcc](https://github.com/microsoft/RD-Agent/commit/f845dcc0ee1b059b8b32485ad46bb90c7ae0fa78))
* fix some description errors in direct_exp_gen ([#698](https://github.com/microsoft/RD-Agent/issues/698)) ([dfaacb6](https://github.com/microsoft/RD-Agent/commit/dfaacb6d06e5d5f55e950d7177570d1efebf958f))
* fix some minor bugs and add AutoML & cross-validation ([#604](https://github.com/microsoft/RD-Agent/issues/604)) ([18c5ef2](https://github.com/microsoft/RD-Agent/commit/18c5ef268d40efe7bb9ee18aa0d250732bdda6fa))
* fix submission file search and add TODO in env.py ([#544](https://github.com/microsoft/RD-Agent/issues/544)) ([54d930e](https://github.com/microsoft/RD-Agent/commit/54d930e91e629f0fc2f8bdd0d0d62fcad1e99a9c))
* fix task return dict with wrong format ([#558](https://github.com/microsoft/RD-Agent/issues/558)) ([2008244](https://github.com/microsoft/RD-Agent/commit/20082440a249dd0e5a7026c2d98c9de0288dd400))
* fix the errors in the coder and evaluator of the five components ([#576](https://github.com/microsoft/RD-Agent/issues/576)) ([c487f83](https://github.com/microsoft/RD-Agent/commit/c487f835b651cdc40b95bbbe4efcb9a617be9e40))
* handle division by zero in percentage calculations ([#550](https://github.com/microsoft/RD-Agent/issues/550)) ([de16c91](https://github.com/microsoft/RD-Agent/commit/de16c915e1716ef8cee43ce41069ea1a09cf1f24))
* handle invalid regex patterns in filter_progress_bar function ([#579](https://github.com/microsoft/RD-Agent/issues/579)) ([b0daee0](https://github.com/microsoft/RD-Agent/commit/b0daee0d90e193ca1d028e01c31ebf368af89601))
* Handle ValueError when resolving relative path for uri ([#585](https://github.com/microsoft/RD-Agent/issues/585)) ([4c7765a](https://github.com/microsoft/RD-Agent/commit/4c7765a12bda5dcfd9af72b292853d9bc28c5baf))
* include data information in cache key generation ([#566](https://github.com/microsoft/RD-Agent/issues/566)) ([26dda46](https://github.com/microsoft/RD-Agent/commit/26dda4682b7b643c164589057cb568a4d9e55e17))
* keep some txt files ([#557](https://github.com/microsoft/RD-Agent/issues/557)) ([54aba85](https://github.com/microsoft/RD-Agent/commit/54aba851c9fa194e318d37700307df59e06c6c84))
* mle_score save problem ([#674](https://github.com/microsoft/RD-Agent/issues/674)) ([ca2e478](https://github.com/microsoft/RD-Agent/commit/ca2e478cf25c2c8511d5f027e32f8a98fc8e3a07))
* move docker timeout message to __run() ([#620](https://github.com/microsoft/RD-Agent/issues/620)) ([585f4f9](https://github.com/microsoft/RD-Agent/commit/585f4f96e09f70d00eb397c10bf49c09973111df))
* move mlebench check into runner ([#556](https://github.com/microsoft/RD-Agent/issues/556)) ([b0f7965](https://github.com/microsoft/RD-Agent/commit/b0f7965f650638273710302efee2e5da037368a2))
* move next_component_required logic to DSTrace class and accurate implement ([#612](https://github.com/microsoft/RD-Agent/issues/612)) ([c20d311](https://github.com/microsoft/RD-Agent/commit/c20d311792f33b2ccccb466c6ec3155ff8be3213))
* patching weird azure deployment ([#494](https://github.com/microsoft/RD-Agent/issues/494)) ([89c50ae](https://github.com/microsoft/RD-Agent/commit/89c50aee2ec8bfd1cb23767ddf7dcdd023daac8b))
* qlib and other scenario bugs ([#636](https://github.com/microsoft/RD-Agent/issues/636)) ([98de31d](https://github.com/microsoft/RD-Agent/commit/98de31d4e577c8c450c9694f73a755c19af571f7))
* refine prompt to generate the most simple task in init stage ([#546](https://github.com/microsoft/RD-Agent/issues/546)) ([9d6feed](https://github.com/microsoft/RD-Agent/commit/9d6feed28ce034db48482d8d9741ef8c72f4bddc))
* replace API call with build_cls_from_json_with_retry function ([#548](https://github.com/microsoft/RD-Agent/issues/548)) ([eb72a47](https://github.com/microsoft/RD-Agent/commit/eb72a47fbf9c88dacea9691b8d7e92610492d190))
* replace func "len()" in ensemble test code to support various data type ([#739](https://github.com/microsoft/RD-Agent/issues/739)) ([ab9c7b9](https://github.com/microsoft/RD-Agent/commit/ab9c7b955f78c5de7ec08a6c1a012a76badbdd0e))
* return 1D embedding if create_embedding receive a string input ([#670](https://github.com/microsoft/RD-Agent/issues/670)) ([4a9c318](https://github.com/microsoft/RD-Agent/commit/4a9c3180ae4a4b043b1b4a89f51ee69cb6843142))
* rich.print error when some control char in output ([#684](https://github.com/microsoft/RD-Agent/issues/684)) ([ec0cb2a](https://github.com/microsoft/RD-Agent/commit/ec0cb2a032824023dcd04a3acc93202471d1f90a))
* Runnable on first complete & Rename method to next_incomplete_component for clarity ([#615](https://github.com/microsoft/RD-Agent/issues/615)) ([93d9f63](https://github.com/microsoft/RD-Agent/commit/93d9f63369a78f78e1a67ab548923bb994d1d3b4))
* runner COSTEER evaluator ([#693](https://github.com/microsoft/RD-Agent/issues/693)) ([6a379ec](https://github.com/microsoft/RD-Agent/commit/6a379ec9b84d4e4944f1e412347aae4f5a93d476))
* save only one mle_score pkl for a running exp ([#675](https://github.com/microsoft/RD-Agent/issues/675)) ([f87ab67](https://github.com/microsoft/RD-Agent/commit/f87ab676b73cce82bd9f997ac779e31c571b53c4))
* Set default value for 'entry' parameter in Env.run method ([#643](https://github.com/microsoft/RD-Agent/issues/643)) ([e50d242](https://github.com/microsoft/RD-Agent/commit/e50d2424b849e4181d6ca02e9cace90236665924))
* sort file name for cache reproduction ([#588](https://github.com/microsoft/RD-Agent/issues/588)) ([7158410](https://github.com/microsoft/RD-Agent/commit/7158410fbfdd84052f9a69cf1e04e09ac07ca598))
* sota comparison logic ([#608](https://github.com/microsoft/RD-Agent/issues/608)) ([3575372](https://github.com/microsoft/RD-Agent/commit/35753722c0800d62855faeab996d513e62cfe7de))
* target json type & round ([#662](https://github.com/microsoft/RD-Agent/issues/662)) ([58cb58f](https://github.com/microsoft/RD-Agent/commit/58cb58f966a1db26f5ea9662a54ba12bc921ee24))
* templates bug ([#456](https://github.com/microsoft/RD-Agent/issues/456)) ([434a868](https://github.com/microsoft/RD-Agent/commit/434a8687eeda77e27b4938fb19694c15858ee446))
* trace summary df showing in dsapp ([#551](https://github.com/microsoft/RD-Agent/issues/551)) ([177096d](https://github.com/microsoft/RD-Agent/commit/177096d55fecb8c7dab9650ef8f5a31024cd4c1c))
* unzip kaggle data ([#464](https://github.com/microsoft/RD-Agent/issues/464)) ([3a9fc8e](https://github.com/microsoft/RD-Agent/commit/3a9fc8e73337d3757267b6f4482499499a1b6792))
## [0.3.0](https://github.com/microsoft/RD-Agent/compare/v0.2.1...v0.3.0) (2024-10-21)
### Features
* add a new template for kaggle ([#289](https://github.com/microsoft/RD-Agent/issues/289)) ([eee3ab5](https://github.com/microsoft/RD-Agent/commit/eee3ab5b25198224826cb7a8a17eab28bd5d1f7d))
* add download submission.csv button for kaggle scenario ([#317](https://github.com/microsoft/RD-Agent/issues/317)) ([dcdcbe4](https://github.com/microsoft/RD-Agent/commit/dcdcbe46b4858bfb133ae3cca056e7f602d5cf63))
* add kaggle command ([#271](https://github.com/microsoft/RD-Agent/issues/271)) ([0938394](https://github.com/microsoft/RD-Agent/commit/0938394b7084ffbf3294d8c23d2d34bf7322ca0b))
* add kaggle tpl: feedback-prize ([#331](https://github.com/microsoft/RD-Agent/issues/331)) ([a288e39](https://github.com/microsoft/RD-Agent/commit/a288e399e6b0beec62729bd7d46b98a55de5ab79))
* add more templates for kaggle ([#291](https://github.com/microsoft/RD-Agent/issues/291)) ([da752ec](https://github.com/microsoft/RD-Agent/commit/da752ec806e6f5f5679bc27ac1c072ed9a319251))
* add normal rag into framework ([#360](https://github.com/microsoft/RD-Agent/issues/360)) ([91b0b1f](https://github.com/microsoft/RD-Agent/commit/91b0b1f66c3c1bf757cb64c4cfbdcaafe59eab74))
* add qlib_factor_strategy ([#307](https://github.com/microsoft/RD-Agent/issues/307)) ([f8f59ff](https://github.com/microsoft/RD-Agent/commit/f8f59ff0a1be4428a68c8c27f220aabad0b6c9f0))
* Add ranking in kaggle scenario ([#401](https://github.com/microsoft/RD-Agent/issues/401)) ([b16b4be](https://github.com/microsoft/RD-Agent/commit/b16b4beb402e0c27dfb39ee9d2a120f1b56d447c))
* Add runtime measurement for each step and loop in RDLoop. ([#281](https://github.com/microsoft/RD-Agent/issues/281)) ([83058c8](https://github.com/microsoft/RD-Agent/commit/83058c864ceeec413dd29bf501030d5a7bd34679))
* add s3e11 kaggle template ([#324](https://github.com/microsoft/RD-Agent/issues/324)) ([8c57524](https://github.com/microsoft/RD-Agent/commit/8c57524bead1c8f655a08763d608eb7a6dd5975e))
* Added RepoAnalyzer to empower auto-summary of a workspace ([#264](https://github.com/microsoft/RD-Agent/issues/264)) ([0bd349a](https://github.com/microsoft/RD-Agent/commit/0bd349af50b9b881ba1774bdeb4d723529ef2aa9))
* Added support for loading and storing RAG in Kaggle scenarios. ([#269](https://github.com/microsoft/RD-Agent/issues/269)) ([c4895de](https://github.com/microsoft/RD-Agent/commit/c4895de83f1ed000e563d42b3468a6bd9e5a4965))
* announce Discord and WeChat ([#367](https://github.com/microsoft/RD-Agent/issues/367)) ([acac507](https://github.com/microsoft/RD-Agent/commit/acac5078a103b71afa6bd6c053b0766a6a7e609d))
* auto submit result after one kaggle RDLoop ([#345](https://github.com/microsoft/RD-Agent/issues/345)) ([ab55d70](https://github.com/microsoft/RD-Agent/commit/ab55d7052b53a928b84dc5d5d0d2999d90ca9056))
* better feedback & evaluation ([#346](https://github.com/microsoft/RD-Agent/issues/346)) ([cc9a8c1](https://github.com/microsoft/RD-Agent/commit/cc9a8c1eab3ca89f8c1e5de4a2bb4e7fcc0cc615))
* Dynamic scenario based on task ([#392](https://github.com/microsoft/RD-Agent/issues/392)) ([665a037](https://github.com/microsoft/RD-Agent/commit/665a037e4fd7326c450e3fa0d0605eea26fd9ef3))
* Factor Implement Search Enhancement ([#294](https://github.com/microsoft/RD-Agent/issues/294)) ([4ecf25f](https://github.com/microsoft/RD-Agent/commit/4ecf25f0acf2389a172b14d3dab20895daf2ab89))
* Feature selection v3 to support all actions ([#280](https://github.com/microsoft/RD-Agent/issues/280)) ([0047641](https://github.com/microsoft/RD-Agent/commit/00476413fbf00e36e71ab3ccb48d4e766b6ccf4d))
* fix some bugs and add original features' description ([#259](https://github.com/microsoft/RD-Agent/issues/259)) ([1a5f45a](https://github.com/microsoft/RD-Agent/commit/1a5f45a40d821c017bdba14af8c93710707c5ea5))
* get kaggle notebooks & disscussion text for RAG ([#371](https://github.com/microsoft/RD-Agent/issues/371)) ([cead345](https://github.com/microsoft/RD-Agent/commit/cead3450a14bf4b142ac988c27fa098c7656a95c))
* Iceberge competition ([#372](https://github.com/microsoft/RD-Agent/issues/372)) ([c10ea4f](https://github.com/microsoft/RD-Agent/commit/c10ea4f5d4cc56a75b47cf23c7084ee189ba1a25))
* implement isolated model feature selection loop ([#370](https://github.com/microsoft/RD-Agent/issues/370)) ([cf1292d](https://github.com/microsoft/RD-Agent/commit/cf1292de1a0153ca14ea64971e73a1c93f7d89e3))
* Initial version if Graph RAG in KAGGLE scenario ([#301](https://github.com/microsoft/RD-Agent/issues/301)) ([fd3c0fd](https://github.com/microsoft/RD-Agent/commit/fd3c0fd26eff7d3be72fa4f2a234e33b9f796627))
* Integrate RAG into the Kaggle scenarios. ([#262](https://github.com/microsoft/RD-Agent/issues/262)) ([be0e48a](https://github.com/microsoft/RD-Agent/commit/be0e48a7dfbee2b5d2947d09115db5db2e5266f1))
* Kaggle loop update (Feature & Model) ([#241](https://github.com/microsoft/RD-Agent/issues/241)) ([4cf22a6](https://github.com/microsoft/RD-Agent/commit/4cf22a65c964123b4267569ee02c0c7094c54ca4))
* kaggle templates related ([#287](https://github.com/microsoft/RD-Agent/issues/287)) ([785fdc1](https://github.com/microsoft/RD-Agent/commit/785fdc144d16fa8454b7c9d2e53e78fe7f22a29a))
* Model context for tuning and selection ([#284](https://github.com/microsoft/RD-Agent/issues/284)) ([f2831e7](https://github.com/microsoft/RD-Agent/commit/f2831e7442510668b0ca75953b3359894803ef3c))
* Modify FactorRowCountEvaluator and FactorIndexEvaluator to return the ratio ([#328](https://github.com/microsoft/RD-Agent/issues/328)) ([8f43f8e](https://github.com/microsoft/RD-Agent/commit/8f43f8e87a92e05b541e925910608606ec8f6c4b))
* New competition - Optiver ([#356](https://github.com/microsoft/RD-Agent/issues/356)) ([3705efe](https://github.com/microsoft/RD-Agent/commit/3705efe3b923748655a57d76b7a236e54d361831))
* random forest for s3e11 ([#347](https://github.com/microsoft/RD-Agent/issues/347)) ([b57846d](https://github.com/microsoft/RD-Agent/commit/b57846d29314e9a5967945d1b4895f0f48c0f5ce))
* refine the code in model description and fix some bugs in feedback.py ([#288](https://github.com/microsoft/RD-Agent/issues/288)) ([5b124d7](https://github.com/microsoft/RD-Agent/commit/5b124d7372137e4c613eb2749ddcc773922cc7b6))
* refine the template in several Kaggle competitions ([#343](https://github.com/microsoft/RD-Agent/issues/343)) ([034f238](https://github.com/microsoft/RD-Agent/commit/034f238ed5ec351486b21250eabc75114961936c))
* Revise to support better hypothesis proposal ([#390](https://github.com/microsoft/RD-Agent/issues/390)) ([c55ec0a](https://github.com/microsoft/RD-Agent/commit/c55ec0a0f577bbf7fc6228f7b87d2089ded83b31))
* show workspace in demo ([#348](https://github.com/microsoft/RD-Agent/issues/348)) ([ddf567c](https://github.com/microsoft/RD-Agent/commit/ddf567c551b553788be022e9312c209ef6137d64))
* support Multi output ([#330](https://github.com/microsoft/RD-Agent/issues/330)) ([3d36c45](https://github.com/microsoft/RD-Agent/commit/3d36c452ff0983800e5343834cc69f24a508ea70))
* Supporting COVID-19 competition ([#374](https://github.com/microsoft/RD-Agent/issues/374)) ([a1b63db](https://github.com/microsoft/RD-Agent/commit/a1b63db79600edc9a74ba713c9d0be290214a592))
* supporting Mnist competition ([#375](https://github.com/microsoft/RD-Agent/issues/375)) ([e958a34](https://github.com/microsoft/RD-Agent/commit/e958a34f5632a46ac43bff8e0d07d6ed020fdfc2))
* Supporting Model Specifications ([#319](https://github.com/microsoft/RD-Agent/issues/319)) ([e126471](https://github.com/microsoft/RD-Agent/commit/e1264719e10b76158a91cd0ef331848e7c2de7c7))
* supporting various Kaggle competitions & scenarios for RD-Agent ([#409](https://github.com/microsoft/RD-Agent/issues/409)) ([75eea22](https://github.com/microsoft/RD-Agent/commit/75eea22cc3d4e6f5a94c88cce915e27c507f8c50))
* template for kaggle ([#308](https://github.com/microsoft/RD-Agent/issues/308)) ([ff97cf0](https://github.com/microsoft/RD-Agent/commit/ff97cf0155ab6941e4b5cf7d103575f934b70dc9))
* use auto gen seed when using LLM cache ([#441](https://github.com/microsoft/RD-Agent/issues/441)) ([ca15365](https://github.com/microsoft/RD-Agent/commit/ca15365d23eeb094f42cf3dc8f5269b2f1c42bd3))
* use unified pickle cacher & move llm config into a isolated config ([#424](https://github.com/microsoft/RD-Agent/issues/424)) ([2879ecf](https://github.com/microsoft/RD-Agent/commit/2879ecff816d97688b60909a79c7e568d42608a1))
* xgboost gpu accelerate ([#359](https://github.com/microsoft/RD-Agent/issues/359)) ([56a5b8f](https://github.com/microsoft/RD-Agent/commit/56a5b8f9b2c6726cc64ec5b04b4ce7935d59b572))
### Bug Fixes
* a bug of developer& edit s4e8 template ([#338](https://github.com/microsoft/RD-Agent/issues/338)) ([f12ce72](https://github.com/microsoft/RD-Agent/commit/f12ce726e7de96d478a232a3c27f92439820f8b4))
* actively raised errors aer also considered as negative feedback. ([#268](https://github.com/microsoft/RD-Agent/issues/268)) ([46ec908](https://github.com/microsoft/RD-Agent/commit/46ec908e3594ac5e4cdc4057268e2f8800f5ed1f))
* bug of saving preprocess cache files ([#310](https://github.com/microsoft/RD-Agent/issues/310)) ([5fb0608](https://github.com/microsoft/RD-Agent/commit/5fb0608f39f113cc9807fb1f381284a0bd4da318))
* cache ([#383](https://github.com/microsoft/RD-Agent/issues/383)) ([f2a6e75](https://github.com/microsoft/RD-Agent/commit/f2a6e75b36ca96f7733b9c2a7154ac67bd2d7c6f))
* change css tag of kaggle competition info crawler ([#306](https://github.com/microsoft/RD-Agent/issues/306)) ([1e3d38b](https://github.com/microsoft/RD-Agent/commit/1e3d38bf1ca3654f3a90ff392ecba1dbb4e80224))
* debug dsagent ([#387](https://github.com/microsoft/RD-Agent/issues/387)) ([8fe9511](https://github.com/microsoft/RD-Agent/commit/8fe9511e606ba148c66f384add6ab94857079541))
* eval_method cannot catch run factor error ([#260](https://github.com/microsoft/RD-Agent/issues/260)) ([2aaab31](https://github.com/microsoft/RD-Agent/commit/2aaab317ccb7a0121063bcd85fc36c21c7b8a391))
* fix a bug in competition metric evaluation ([#407](https://github.com/microsoft/RD-Agent/issues/407)) ([94c47d6](https://github.com/microsoft/RD-Agent/commit/94c47d6fd5c3e38fc786a83e6d0d05e8d04498f3))
* fix a bug in mini case ([#389](https://github.com/microsoft/RD-Agent/issues/389)) ([e75bb57](https://github.com/microsoft/RD-Agent/commit/e75bb5746f63933b750406bbd34ee63c5ba76b9f))
* fix a bug in model tuning feedback ([#316](https://github.com/microsoft/RD-Agent/issues/316)) ([8aa088d](https://github.com/microsoft/RD-Agent/commit/8aa088da2dc7525a3970c01d01987246f47d6238))
* fix a bug in scenario.py ([#388](https://github.com/microsoft/RD-Agent/issues/388)) ([999a1eb](https://github.com/microsoft/RD-Agent/commit/999a1eb0eff9088e1b02419db741db4acf8d9ff7))
* fix a bug in the format of the model input ([#327](https://github.com/microsoft/RD-Agent/issues/327)) ([8f0574e](https://github.com/microsoft/RD-Agent/commit/8f0574eaaadb245b8c38e09ad4821306996d926f))
* fix a small bug in cache using module name and function name as unique folder name ([#429](https://github.com/microsoft/RD-Agent/issues/429)) ([4f8134a](https://github.com/microsoft/RD-Agent/commit/4f8134a697d952f7ac824d7ebeec64bbc4545ab3))
* fix a typo ([#362](https://github.com/microsoft/RD-Agent/issues/362)) ([9fafabd](https://github.com/microsoft/RD-Agent/commit/9fafabdf321b818bdd2211a2324d50cd0ebe1c1f))
* fix cache result logic ([#430](https://github.com/microsoft/RD-Agent/issues/430)) ([5e34263](https://github.com/microsoft/RD-Agent/commit/5e342637dcc862679fd0642c6ba9ef048c984845))
* fix command injection ([#421](https://github.com/microsoft/RD-Agent/issues/421)) ([52f30a6](https://github.com/microsoft/RD-Agent/commit/52f30a6184af1295be15e855a80b84bc424fc75d))
* fix json load error ([#386](https://github.com/microsoft/RD-Agent/issues/386)) ([bba55fb](https://github.com/microsoft/RD-Agent/commit/bba55fb48fe105f4847c1b9c476eedc80835f523))
* fix some bugs in feedback.py and refine the prompt ([#292](https://github.com/microsoft/RD-Agent/issues/292)) ([d834052](https://github.com/microsoft/RD-Agent/commit/d8340527f133dcc649d599d90d6402eddd37859e))
* fix some bugs in knowledge base ([#378](https://github.com/microsoft/RD-Agent/issues/378)) ([fa6ff8e](https://github.com/microsoft/RD-Agent/commit/fa6ff8e591cf1847df77d73116649c5623161573))
* fix some bugs in rag ([#399](https://github.com/microsoft/RD-Agent/issues/399)) ([194215c](https://github.com/microsoft/RD-Agent/commit/194215c4559aee5b6ece18d65c95fb30968e2db6))
* fix some bugs in the entire loop ([#274](https://github.com/microsoft/RD-Agent/issues/274)) ([8a564ec](https://github.com/microsoft/RD-Agent/commit/8a564ece1d87b27ee98b76db317935e802468965))
* fix some errors in scenario.py, proposal.py and runner.py and several complex competition scenarios([#365](https://github.com/microsoft/RD-Agent/issues/365)) ([2e383b1](https://github.com/microsoft/RD-Agent/commit/2e383b175d8448a67cb470f4e3ae8977d8ec6b5b))
* improve_execution_time_in_kaggle_loop ([#279](https://github.com/microsoft/RD-Agent/issues/279)) ([4c8f998](https://github.com/microsoft/RD-Agent/commit/4c8f998c76f1e983a5687d2c65d3251750f2a9a0))
* kaggle data mount problem ([#297](https://github.com/microsoft/RD-Agent/issues/297)) ([795df31](https://github.com/microsoft/RD-Agent/commit/795df311e3f93cd2f3fb51ba5698adaf10f6bd62))
* Optiver fixes ([#357](https://github.com/microsoft/RD-Agent/issues/357)) ([b054017](https://github.com/microsoft/RD-Agent/commit/b054017463af0d1784407030f2477d212118f341))
* partial bug in bench ([#368](https://github.com/microsoft/RD-Agent/issues/368)) ([af9808f](https://github.com/microsoft/RD-Agent/commit/af9808f98736a2df07e121c2f6d7bfeb7b7d3581))
* preprocess output format & some mistake in spelling ([#358](https://github.com/microsoft/RD-Agent/issues/358)) ([b8b2cd6](https://github.com/microsoft/RD-Agent/commit/b8b2cd6ccd3b27aa73de847e50899a8a53b71b8f))
* rag save file ([#385](https://github.com/microsoft/RD-Agent/issues/385)) ([1cb01dd](https://github.com/microsoft/RD-Agent/commit/1cb01dd6fe595f2f5fb86487601326611dd1a57a))
* raise error in demo when no Metric in a Loop ([#313](https://github.com/microsoft/RD-Agent/issues/313)) ([e46a78e](https://github.com/microsoft/RD-Agent/commit/e46a78eb69271cb19978aab2f3b976c2870ca082))
* refactor Bench ([#302](https://github.com/microsoft/RD-Agent/issues/302)) ([78a87f6](https://github.com/microsoft/RD-Agent/commit/78a87f624780ff67c0fa995ae4692678a120f99c))
* refine some codes ([#353](https://github.com/microsoft/RD-Agent/issues/353)) ([866c2e6](https://github.com/microsoft/RD-Agent/commit/866c2e63ffa3876a3d16ad37f96da41d0558b714))
* refine the prompt ([#286](https://github.com/microsoft/RD-Agent/issues/286)) ([77966c4](https://github.com/microsoft/RD-Agent/commit/77966c4f5e9f492c437c5b4b78d89c0f875ef0d8))
* refine the ucb algorithm ([#406](https://github.com/microsoft/RD-Agent/issues/406)) ([14f7d97](https://github.com/microsoft/RD-Agent/commit/14f7d976e03c92d6e727524e0cdad8a03b585016))
* revert model and make SOTA model available to COSTEER ([#351](https://github.com/microsoft/RD-Agent/issues/351)) ([3b7437b](https://github.com/microsoft/RD-Agent/commit/3b7437b87e685188259779cd85a78a0b592de9de))
* stop using markup in docker env print ([#336](https://github.com/microsoft/RD-Agent/issues/336)) ([3009889](https://github.com/microsoft/RD-Agent/commit/3009889b5e2605b5427c76f3084e0e58026bb5ae))
* support seed and fix absolute path ([#278](https://github.com/microsoft/RD-Agent/issues/278)) ([26352e1](https://github.com/microsoft/RD-Agent/commit/26352e13121cad5be95c0de78bb9f5dda4330614))
* template for kaggle foreset & s4e9 ([#334](https://github.com/microsoft/RD-Agent/issues/334)) ([2393a41](https://github.com/microsoft/RD-Agent/commit/2393a41e7237615ced2c3fdd5c49308236b9f276))
* test kaggle method ([#296](https://github.com/microsoft/RD-Agent/issues/296)) ([91a6196](https://github.com/microsoft/RD-Agent/commit/91a619618be1d7db660ea2b413a78dfaba9417a1))
* update code to fix a small bug in model cache md5 hash ([#303](https://github.com/microsoft/RD-Agent/issues/303)) ([b00e4dc](https://github.com/microsoft/RD-Agent/commit/b00e4dc2eff5b16029a2a12a6589eadac5cfd148))
* update new feature engineering code format ([#272](https://github.com/microsoft/RD-Agent/issues/272)) ([7850b80](https://github.com/microsoft/RD-Agent/commit/7850b8006a7c89d22629b345b4f361b0f35bc60d))
* Update prompts.yaml to constrain only one model type ([#341](https://github.com/microsoft/RD-Agent/issues/341)) ([5b5dfee](https://github.com/microsoft/RD-Agent/commit/5b5dfeefbc7eb9dcbd9923544005c5d281262c03))
* Update runner.py to fix a small bug ([#282](https://github.com/microsoft/RD-Agent/issues/282)) ([8aef3ab](https://github.com/microsoft/RD-Agent/commit/8aef3abcecd6002bd4bfeedcbe2c786d8bbfe2be))
* Use fixed file name in model costeer & fixing cache ([#311](https://github.com/microsoft/RD-Agent/issues/311)) ([1f910a5](https://github.com/microsoft/RD-Agent/commit/1f910a5248bc576895ed66c2f7b2c3e046a2bc28))
### Performance Improvements
* some small upgrade to factor costeer to improve the performance ([#420](https://github.com/microsoft/RD-Agent/issues/420)) ([9eb931f](https://github.com/microsoft/RD-Agent/commit/9eb931ffd971f252380dbd33ad1db259a4f229fd))
### Reverts
* Revert feat: Factor Implement Search Enhancement ([#294](https://github.com/microsoft/RD-Agent/issues/294)) ([#305](https://github.com/microsoft/RD-Agent/issues/305)) ([f663cf4](https://github.com/microsoft/RD-Agent/commit/f663cf42a2f75cd52aef1c6b18be7c27f0641fed))
## [0.2.1](https://github.com/microsoft/RD-Agent/compare/v0.2.0...v0.2.1) (2024-09-10)
### Bug Fixes
* default model value in config ([#256](https://github.com/microsoft/RD-Agent/issues/256)) ([c097585](https://github.com/microsoft/RD-Agent/commit/c097585f631f401c2c0966f6ad4c17286924f011))
* fix_dotenv_error ([#257](https://github.com/microsoft/RD-Agent/issues/257)) ([923063c](https://github.com/microsoft/RD-Agent/commit/923063c1fd957c4ed42e97272c72b5e9545451dc))
* readme ([#248](https://github.com/microsoft/RD-Agent/issues/248)) ([8cede22](https://github.com/microsoft/RD-Agent/commit/8cede2209922876490148459e1134da828e1fda0))
## [0.2.0](https://github.com/microsoft/RD-Agent/compare/v0.1.0...v0.2.0) (2024-09-07)
### Features
* add collect info ([#233](https://github.com/microsoft/RD-Agent/issues/233)) ([89f4af9](https://github.com/microsoft/RD-Agent/commit/89f4af90fb4d95a0689bf9efc8ffd9326469c0aa))
* add cross validation for kaggle scenario ([#236](https://github.com/microsoft/RD-Agent/issues/236)) ([e0b03ba](https://github.com/microsoft/RD-Agent/commit/e0b03ba6b5c3d9aa552b99d470e106d4e348e64d))
* add progress status for docker env ([#215](https://github.com/microsoft/RD-Agent/issues/215)) ([538d4ef](https://github.com/microsoft/RD-Agent/commit/538d4ef2e52de795b90d3f75b2e1e877ab85c18d))
* Added loop code for Kaggle scene. ([#211](https://github.com/microsoft/RD-Agent/issues/211)) ([975c327](https://github.com/microsoft/RD-Agent/commit/975c32715e51aec6b49537401f5fc59115e04a01))
* Demo display effect and usage ([#162](https://github.com/microsoft/RD-Agent/issues/162)) ([8cf122a](https://github.com/microsoft/RD-Agent/commit/8cf122a0155f434fa4477ae7a6d616b5caecd3e0))
* piloting of the framework ([#227](https://github.com/microsoft/RD-Agent/issues/227)) ([e9b103e](https://github.com/microsoft/RD-Agent/commit/e9b103e684fdd2b98cd1a89971a3fce2d6e884a1))
* support more models for kaggle scenario ([#223](https://github.com/microsoft/RD-Agent/issues/223)) ([e3a9659](https://github.com/microsoft/RD-Agent/commit/e3a96598c0720fe092ec86d7ca8c195c7d6bcc72))
* update model_experiment.py to support basic EDA ([#220](https://github.com/microsoft/RD-Agent/issues/220)) ([bf2684c](https://github.com/microsoft/RD-Agent/commit/bf2684c4d55ab8e1048ac0291695475ad53b0cd6))
### Bug Fixes
* fix some bugs in llm calling ([#217](https://github.com/microsoft/RD-Agent/issues/217)) ([7b010f8](https://github.com/microsoft/RD-Agent/commit/7b010f8b5940aba65a58f1d78192aa80bcd0e654))
* package dependency. ([#234](https://github.com/microsoft/RD-Agent/issues/234)) ([46be295](https://github.com/microsoft/RD-Agent/commit/46be2952952af534fd8d98a656c704c688d7cbdd))
* remove useless line ([#177](https://github.com/microsoft/RD-Agent/issues/177)) ([64e9a8e](https://github.com/microsoft/RD-Agent/commit/64e9a8e39a2072a962111db18f5b9565df5b0176))
## [0.1.0](https://github.com/microsoft/RD-Agent/compare/v0.0.1...v0.1.0) (2024-08-09)
### Features
* add entry for rdagent. ([#187](https://github.com/microsoft/RD-Agent/issues/187)) ([121b6d9](https://github.com/microsoft/RD-Agent/commit/121b6d98de38cd03be30cbee47b40baf39a2b60b))
* change ui entry ([#197](https://github.com/microsoft/RD-Agent/issues/197)) ([fa5d335](https://github.com/microsoft/RD-Agent/commit/fa5d3354d22240888f4fc4007d9834f7424632aa))
* remove pdfs and enable online pdf readings ([#183](https://github.com/microsoft/RD-Agent/issues/183)) ([18c0501](https://github.com/microsoft/RD-Agent/commit/18c05016a23d694c7b12759cf1322562dcffc56a))
### Bug Fixes
* Fix a fail href in readme ([#189](https://github.com/microsoft/RD-Agent/issues/189)) ([1b89218](https://github.com/microsoft/RD-Agent/commit/1b89218f6bc697494f4a1b8a42ad18963002714f))
* fix quick start problem ([#191](https://github.com/microsoft/RD-Agent/issues/191)) ([44f61bf](https://github.com/microsoft/RD-Agent/commit/44f61bfa1058a8efb59ca48b7f1417765aeea33e))
* update command line in readme.md ([#192](https://github.com/microsoft/RD-Agent/issues/192)) ([9c45d24](https://github.com/microsoft/RD-Agent/commit/9c45d24a192da02f7d9765cb001097da1bc36c61))
## 0.0.1 (2024-08-08)
### Features
* Add description for scenario experiments. ([#174](https://github.com/microsoft/RD-Agent/issues/174)) ([fbd8c6d](https://github.com/microsoft/RD-Agent/commit/fbd8c6d87e1424c08997103b8e8fbf264858c4ed))
* Added QlibFactorFromReportScenario and improved the report-factor loop. ([#161](https://github.com/microsoft/RD-Agent/issues/161)) ([882c79b](https://github.com/microsoft/RD-Agent/commit/882c79bf11583980e646b130f71cfa20201ffc7b))
* filter feature which is high correlation to former implemented features ([#145](https://github.com/microsoft/RD-Agent/issues/145)) ([e818326](https://github.com/microsoft/RD-Agent/commit/e818326422740e04a4863f7c3c18744dde2ad98f))
* Remove redundant 'key steps' section in frontend scene display. ([#169](https://github.com/microsoft/RD-Agent/issues/169)) ([e767005](https://github.com/microsoft/RD-Agent/commit/e76700513bee29232c93b97414419df330d9be8d))
* streamlit webapp demo for different scenarios ([#135](https://github.com/microsoft/RD-Agent/issues/135)) ([d8da7db](https://github.com/microsoft/RD-Agent/commit/d8da7db865e6653fc4740efee9a843b69bd79699))
* Uploaded Documentation, Updated Prompts & Some Code for model demo ([#144](https://github.com/microsoft/RD-Agent/issues/144)) ([529f935](https://github.com/microsoft/RD-Agent/commit/529f935aa98623f0dc1dda29eecee3ef738dd446))
### Bug Fixes
* Add framework handling for task coding failure. ([#176](https://github.com/microsoft/RD-Agent/issues/176)) ([5e14fa5](https://github.com/microsoft/RD-Agent/commit/5e14fa54a9dd30a94aebe2643b8c9a3b85517a11))
* Comprehensive update to factor extraction. ([#143](https://github.com/microsoft/RD-Agent/issues/143)) ([b5ea040](https://github.com/microsoft/RD-Agent/commit/b5ea04019fd5fa15c0f8b9a7e4f18f490f7057d4))
* first round app folder cleaning ([#166](https://github.com/microsoft/RD-Agent/issues/166)) ([6a5a750](https://github.com/microsoft/RD-Agent/commit/6a5a75021912927deb5e8e4c7ad3ec4b51bfc788))
* fix pickle problem ([#140](https://github.com/microsoft/RD-Agent/issues/140)) ([7ee4258](https://github.com/microsoft/RD-Agent/commit/7ee42587b60d94417f34332cee395cf210dc8a0e))
* fix release CI ([#165](https://github.com/microsoft/RD-Agent/issues/165)) ([85d6a5e](https://github.com/microsoft/RD-Agent/commit/85d6a5ed91113fda34ae079b23c89aa24acd2cb2))
* fix release CI error ([#160](https://github.com/microsoft/RD-Agent/issues/160)) ([1c9f8ef](https://github.com/microsoft/RD-Agent/commit/1c9f8ef287961731944acc9008496b4dddeddca7))
* fix several bugs in data mining scenario ([#147](https://github.com/microsoft/RD-Agent/issues/147)) ([b233380](https://github.com/microsoft/RD-Agent/commit/b233380e2c66fb030db39424f0f040c86e37f5c4))
* fix some small bugs in report-factor loop ([#152](https://github.com/microsoft/RD-Agent/issues/152)) ([a79f9f9](https://github.com/microsoft/RD-Agent/commit/a79f9f93406aff6305a76e6a6abd3852642e4c62))
* fix_release_ci_error ([#150](https://github.com/microsoft/RD-Agent/issues/150)) ([4f82e99](https://github.com/microsoft/RD-Agent/commit/4f82e9960a2638af9d831581185ddd3bac5711fc))
* Fixed some bugs introduced during refactoring. ([#167](https://github.com/microsoft/RD-Agent/issues/167)) ([f8f1445](https://github.com/microsoft/RD-Agent/commit/f8f1445283fb89aefeb2918243c35a219a51a56c))
* optimize some prompts in factor loop. ([#158](https://github.com/microsoft/RD-Agent/issues/158)) ([c2c1330](https://github.com/microsoft/RD-Agent/commit/c2c13300b9ad315a663ec2d0eada414e56c6f54f))
### Miscellaneous Chores
* release 0.0.1 ([1feacd3](https://github.com/microsoft/RD-Agent/commit/1feacd39b21193de11e9bbecf880ddf96d7c261c))
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Microsoft Open Source Code of Conduct
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
Resources:
- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to RD-Agent
We welcome contributions and suggestions to improve RD-Agent. Whether it's solving an issue, addressing a bug, enhancing documentation, or even correcting a typo, every contribution is valuable and helps improve the project.
## Getting Started
To get started, you can explore the issues list or search for `TODO:` comments in the codebase by running the command:
```sh
grep -r "TODO:"
```
## How to Contribute
1. **Fork the Repository**: Create a fork of the repository on GitHub.
2. **Clone the Repository**: Clone your forked repository to your local machine.
```sh
git clone https://github.com/your-username/RD-Agent.git
```
3. **Create a Branch**: Create a new branch for your changes.
```sh
git checkout -b feature/your-feature-name
```
4. **Make Changes**: Make your changes to the codebase.
5. **Commit Changes**: Commit your changes with a descriptive commit message.
```sh
git commit -m "Description of your changes"
```
6. **Push Changes**: Push your changes to your forked repository.
```sh
git push origin feature/your-feature-name
```
7. **Ensure CI Passes**: Make sure your code passes the automatic CI checks on GitHub.
8. **Create a Pull Request**: Create a pull request from your forked repository to the main repository.
## Code of Conduct
Please adhere to the [Code of Conduct](CODE_OF_CONDUCT.md) in all your interactions with the project.
## Reporting Issues
If you encounter any issues or have suggestions for improvements, please open an issue on GitHub.
## Guidelines
- Ensure your code follows the project's coding standards.
- Write clear and concise commit messages.
- Update documentation as needed.
- Test your changes thoroughly before submitting a pull request.
Thank you for contributing to RD-Agent!
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE
================================================
FILE: Makefile
================================================
.PHONY: clean deepclean install init-qlib-env dev constraints black isort mypy ruff toml-sort lint pre-commit test-run test build upload docs-autobuild changelog docs-gen docs-mypy docs-coverage docs
#You can modify it according to your terminal
SHELL := /bin/bash
########################################################################################
# Variables
########################################################################################
# Determine whether to invoke pipenv based on CI environment variable and the availability of pipenv.
PIPRUN := $(shell [ "$$CI" != "true" ] && command -v pipenv > /dev/null 2>&1 && echo "pipenv run")
# Get the Python version in `major.minor` format, using the environment variable or the virtual environment if exists.
PYTHON_VERSION := $(shell echo $${PYTHON_VERSION:-$$(python -V 2>&1 | cut -d ' ' -f 2)} | cut -d '.' -f 1,2)
# Determine the constraints file based on the Python version.
CONSTRAINTS_FILE := constraints/$(PYTHON_VERSION).txt
# Documentation target directory, will be adapted to specific folder for readthedocs.
PUBLIC_DIR := $(shell [ "$$READTHEDOCS" = "True" ] && echo "$$READTHEDOCS_OUTPUT/html" || echo "public")
# URL and Path of changelog source code.
CHANGELOG_URL := $(shell echo $${CI_PAGES_URL:-https://microsoft.github.io/rdagent}/_sources/changelog.md.txt)
CHANGELOG_PATH := docs/changelog.md
########################################################################################
# Development Environment Management
########################################################################################
# Remove common intermediate files.
clean:
-rm -rf \
$(PUBLIC_DIR) \
.coverage \
.mypy_cache \
.pytest_cache \
.ruff_cache \
Pipfile* \
coverage.xml \
dist \
release-notes.md
find . -name '*.egg-info' -print0 | xargs -0 rm -rf
find . -name '*.pyc' -print0 | xargs -0 rm -f
find . -name '*.swp' -print0 | xargs -0 rm -f
find . -name '.DS_Store' -print0 | xargs -0 rm -f
find . -name '__pycache__' -print0 | xargs -0 rm -rf
# Remove pre-commit hook, virtual environment alongside itermediate files.
deepclean: clean
if command -v pre-commit > /dev/null 2>&1; then pre-commit uninstall --hook-type pre-push; fi
if command -v pipenv >/dev/null 2>&1 && pipenv --venv >/dev/null 2>&1; then pipenv --rm; fi
# Install the package in editable mode.
install:
$(PIPRUN) pip install -e . -c $(CONSTRAINTS_FILE)
# Install the package in editable mode with specific optional dependencies.
dev-%:
$(PIPRUN) pip install -e .[$*] -c $(CONSTRAINTS_FILE)
# Prepare the development environment.
# Build submodules.
# Install the pacakge in editable mode with all optional dependencies and pre-commit hook.
init-qlib-env:
# note: You may need to install torch manually
# todo: downgrade ruamel.yaml in pyqlib
conda create -n qlibRDAgent python=3.8 -y
@source $$(conda info --base)/etc/profile.d/conda.sh && conda activate qlibRDAgent && which pip && pip install pyqlib && pip install ruamel-yaml==0.17.21 && pip install torch==2.1.1 && pip install catboost==0.24.3 && conda deactivate
dev:
$(PIPRUN) pip install -U pip setuptools wheel
$(PIPRUN) pip install -e .[docs,lint,package,test] -c $(CONSTRAINTS_FILE)
$(PIPRUN) pip install -U kaggle
if [ "$(CI)" != "true" ] && command -v pre-commit > /dev/null 2>&1; then pre-commit install --hook-type pre-push; fi
# Generate constraints for current Python version.
constraints: deepclean
$(PIPRUN) --python $(PYTHON_VERSION) pip install --upgrade -e .[docs,lint,package,test]
$(PIPRUN) pip freeze --exclude-editable > $(CONSTRAINTS_FILE)
########################################################################################
# Lint and pre-commit
########################################################################################
# Check lint with black.
black:
$(PIPRUN) python -m black --check --diff . --extend-exclude "(test/scripts|test/notebook/testfiles|git_ignore_folder|web)" -l 120
# Check lint with isort.
isort:
$(PIPRUN) python -m isort --check . -s git_ignore_folder -s test/scripts -s test/notebook/testfiles -s web
# Check lint with mypy.
# First deal with the core folder, and then gradually increase the scope of detection,
# and eventually realize the detection of the complete project.
mypy:
$(PIPRUN) python -m mypy rdagent/core
# Check lint with ruff.
# First deal with the core folder, and then gradually increase the scope of detection,
# and eventually realize the detection of the complete project.
ruff:
$(PIPRUN) ruff check rdagent/core --ignore FBT001,FBT002,I001,E501 # --exclude rdagent/scripts,git_ignore_folder
# Check lint with toml-sort.
toml-sort:
$(PIPRUN) toml-sort --check pyproject.toml
# Check lint with all linters.
# Prioritize fixing isort, then black, otherwise you'll get weird and unfixable black errors.
# lint: mypy ruff
lint: mypy ruff isort black toml-sort
# Run pre-commit with autofix against all files.
pre-commit:
pre-commit run --all-files
########################################################################################
# Auto Lint
########################################################################################
# Auto lint with black.
auto-black:
$(PIPRUN) python -m black . --extend-exclude "(test/scripts|test/notebook/testfiles|git_ignore_folder|.venv|web)" -l 120
# Auto lint with isort.
auto-isort:
$(PIPRUN) python -m isort . -s git_ignore_folder -s test/scripts -s test/notebook/testfiles -s .venv -s web
# Auto lint with toml-sort.
auto-toml-sort:
$(PIPRUN) toml-sort pyproject.toml
# Auto lint with all linters.
auto-lint: auto-isort auto-black auto-toml-sort
########################################################################################
# Test
########################################################################################
# Clean and run test with coverage.
test-run:
$(PIPRUN) python -m coverage erase
$(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest --ignore test/scripts
$(PIPRUN) python -m coverage combine
test-run-offline:
# some test that does not require api calling
$(PIPRUN) python -m coverage erase
$(PIPRUN) python -m coverage run --concurrency=multiprocessing -m pytest -m "offline" --ignore test/scripts
$(PIPRUN) python -m coverage combine
# Generate coverage report for terminal and xml.
# TODO: we may have higher coverage rate if we have more test
test: test-run
$(PIPRUN) python -m coverage report --fail-under 20 # 80
$(PIPRUN) python -m coverage xml --fail-under 20 # 80
test-offline: test-run-offline
$(PIPRUN) python -m coverage report --fail-under 20 # 80
$(PIPRUN) python -m coverage xml --fail-under 20 # 80
########################################################################################
# Package
########################################################################################
# Build the package.
build:
$(PIPRUN) python -m build
# Upload the package.
upload:
$(PIPRUN) python -m twine upload dist/*
########################################################################################
# Documentation
########################################################################################
# Generate documentation with auto build when changes happen.
docs-autobuild:
$(PIPRUN) python -m sphinx_autobuild docs $(PUBLIC_DIR) \
--watch README.md \
--watch rdagent
# Generate changelog from git commits.
# The -c and -s arguments should match
# If -c uses Basic (default, inherits from base class), -s optional argument: # If -c uses conventional (inherits from base class), -s optional parameter: add,fix,change,remove,merge,doc
# If -c uses conventional (inherits from base class), -s is optional: build,chore,ci,deps,doc,docs,feat,fix,perf,ref,refactor,revert,style,test,tests
# If -c uses angular (inherits from conventional), -s optional argument: build,chore,ci,deps,doc,docs,feat,fix,perf,ref,refactor,revert,style,test,tests
# NOTE(xuan.hu): Need to be run before document generation to take effect.
# $(PIPRUN) git-changelog -ETrio $(CHANGELOG_PATH) -c conventional -s build,chore,ci,docs,feat,fix,perf,refactor,revert,style,test
changelog:
@if wget -q --spider $(CHANGELOG_URL); then \
echo "Existing Changelog found at '$(CHANGELOG_URL)', download for incremental generation."; \
wget -q -O $(CHANGELOG_PATH) $(CHANGELOG_URL); \
fi
$(PIPRUN) LATEST_TAG=$$(git tag --sort=-creatordate | head -n 1); \
git-changelog --bump $$LATEST_TAG -Tio docs/changelog.md -c conventional -s build,chore,ci,deps,doc,docs,feat,fix,perf,ref,refactor,revert,style,test,tests
# Generate release notes from changelog.
release-notes:
@$(PIPRUN) git-changelog --input $(CHANGELOG_PATH) --release-notes
# Build documentation only from rdagent.
docs-gen:
$(PIPRUN) python -m sphinx.cmd.build -W docs $(PUBLIC_DIR)
# Generate mypy reports.
docs-mypy: docs-gen
$(PIPRUN) python -m mypy rdagent test --exclude git_ignore_folder --exclude rdagent/scripts --html-report $(PUBLIC_DIR)/reports/mypy
# Generate html coverage reports with badge.
docs-coverage: test-run docs-gen
$(PIPRUN) python -m coverage html -d $(PUBLIC_DIR)/reports/coverage --fail-under 80
$(PIPRUN) bash scripts/generate-coverage-badge.sh $(PUBLIC_DIR)/_static/badges
# Generate all documentation with reports.
docs: changelog docs-gen docs-mypy docs-coverage
########################################################################################
# End
########################################################################################
================================================
FILE: README.md
================================================
<h4 align="center">
<img src="docs/_static/logo.png" alt="RA-Agent logo" style="width:70%; ">
<a href="https://rdagent.azurewebsites.net" target="_blank">🖥️ Live Demo</a> |
<a href="https://rdagent.azurewebsites.net/factor_loop" target="_blank">🎥 Demo Video</a> <a href="https://www.youtube.com/watch?v=JJ4JYO3HscM&list=PLALmKB0_N3_i52fhUmPQiL4jsO354uopR" target="_blank">▶️YouTube</a> |
<a href="https://rdagent.readthedocs.io/en/latest/index.html" target="_blank">📖 Documentation</a> |
<a href="https://aka.ms/RD-Agent-Tech-Report" target="_blank">📄 Tech Report</a> |
<a href="#-paperwork-list"> 📃 Papers </a>
</h3>
[](https://github.com/microsoft/RD-Agent/actions/workflows/ci.yml)
[](https://github.com/microsoft/RD-Agent/actions/workflows/github-code-scanning/codeql)
[](https://github.com/microsoft/RD-Agent/actions/workflows/dependabot/dependabot-updates)
[](https://github.com/microsoft/RD-Agent/actions/workflows/pr.yml)
[](https://github.com/microsoft/RD-Agent/actions/workflows/release.yml)
[](https://pypi.org/project/rdagent/#files)
[](https://pypi.org/project/rdagent/)
[](https://pypi.org/project/rdagent/)
[](https://github.com/microsoft/RD-Agent/releases)
[](https://github.com/microsoft/RD-Agent/blob/main/LICENSE)
[](https://github.com/pre-commit/pre-commit)
[](http://mypy-lang.org/)
[](https://github.com/astral-sh/ruff)
[](https://discord.gg/ybQ97B6Jjy)
[](https://rdagent.readthedocs.io/en/latest/?badge=latest)
[](https://github.com/microsoft/RD-Agent/actions/workflows/readthedocs-preview.yml) <!-- this badge is too long, please place it in the last one to make it pretty -->
[](https://arxiv.org/abs/2505.14738)
# 📰 News
| 🗞️ News | 📝 Description |
| -- | ------ |
| Web UI Release | We release a new frontend that can be built and served by `rdagent server_ui` for real-time interaction and trace viewing, currently excluding the `data_science` scenario. |
| NeurIPS 2025 Acceptance | We are thrilled to announce that our paper [R&D-Agent-Quant](https://arxiv.org/abs/2505.15155) has been accepted to NeurIPS 2025 |
| [Technical Report Release](#overall-technical-report) | Overall framework description and results on MLE-bench |
| [R&D-Agent-Quant Release](#deep-application-in-diverse-scenarios) | Apply R&D-Agent to quant trading |
| MLE-Bench Results Released | R&D-Agent currently leads as the [top-performing machine learning engineering agent](#-the-best-machine-learning-engineering-agent) on MLE-bench |
| Support LiteLLM Backend | We now fully support **[LiteLLM](https://github.com/BerriAI/litellm)** as our default backend for integration with multiple LLM providers. |
| General Data Science Agent | [Data Science Agent](https://rdagent.readthedocs.io/en/latest/scens/data_science.html) |
| Kaggle Scenario release | We release **[Kaggle Agent](https://rdagent.readthedocs.io/en/latest/scens/data_science.html)**, try the new features! |
| Official WeChat group release | We created a WeChat group, welcome to join! (🗪[QR Code](https://github.com/microsoft/RD-Agent/issues/880)) |
| Official Discord release | We launch our first chatting channel in Discord (🗪[](https://discord.gg/ybQ97B6Jjy)) |
| First release | **R&D-Agent** is released on GitHub |
# 🏆 The Best Machine Learning Engineering Agent!
[MLE-bench](https://github.com/openai/mle-bench) is a comprehensive benchmark evaluating the performance of AI agents on machine learning engineering tasks. Utilizing datasets from 75 Kaggle competitions, MLE-bench provides robust assessments of AI systems' capabilities in real-world ML engineering scenarios.
R&D-Agent currently leads as the top-performing machine learning engineering agent on MLE-bench:
| Agent | Low == Lite (%) | Medium (%) | High (%) | All (%) |
|---------|--------|-----------|---------|----------|
| R&D-Agent o3(R)+GPT-4.1(D) | 51.52 ± 6.9 | 19.3 ± 5.5 | 26.67 ± 0 | 30.22 ± 1.5 |
| R&D-Agent o1-preview | 48.18 ± 2.49 | 8.95 ± 2.36 | 18.67 ± 2.98 | 22.4 ± 1.1 |
| AIDE o1-preview | 34.3 ± 2.4 | 8.8 ± 1.1 | 10.0 ± 1.9 | 16.9 ± 1.1 |
**Notes:**
- **O3(R)+GPT-4.1(D)**: This version is designed to both reduce average time per loop and leverage a cost-effective combination of backend LLMs by seamlessly integrating Research Agent (o3) with Development Agent (GPT-4.1).
- **AIDE o1-preview**: Represents the previously best public result on MLE-bench as reported in the original MLE-bench paper.
- Average and standard deviation results for R&D-Agent o1-preview is based on a independent of 5 seeds and for R&D-Agent o3(R)+GPT-4.1(D) is based on 6 seeds.
- According to MLE-Bench, the 75 competitions are categorized into three levels of complexity: **Low==Lite** if we estimate that an experienced ML engineer can produce a sensible solution in under 2 hours, excluding the time taken to train any models; **Medium** if it takes between 2 and 10 hours; and **High** if it takes more than 10 hours.
You can inspect the detailed runs of the above results online.
- [R&D-Agent o1-preview detailed runs](https://aka.ms/RD-Agent_MLE-Bench_O1-preview)
- [R&D-Agent o3(R)+GPT-4.1(D) detailed runs](https://aka.ms/RD-Agent_MLE-Bench_O3_GPT41)
For running R&D-Agent on MLE-bench, refer to **[MLE-bench Guide: Running ML Engineering via MLE-bench](https://rdagent.readthedocs.io/en/latest/scens/data_science.html)**
# 🥇 The First Data-Centric Quant Multi-Agent Framework!
R&D-Agent for Quantitative Finance, in short **RD-Agent(Q)**, is the first data-centric, multi-agent framework designed to automate the full-stack research and development of quantitative strategies via coordinated factor-model co-optimization.

Extensive experiments in real stock markets show that, at a cost under $10, RD-Agent(Q) achieves approximately 2× higher ARR than benchmark factor libraries while using over 70% fewer factors. It also surpasses state-of-the-art deep time-series models under smaller resource budgets. Its alternating factor–model optimization further delivers excellent trade-off between predictive accuracy and strategy robustness.
You can learn more details about **RD-Agent(Q)** through the [paper](https://arxiv.org/abs/2505.15155) and reproduce it through the [documentation](https://rdagent.readthedocs.io/en/latest/scens/quant_agent_fin.html).
# Data Science Agent Preview
Check out our demo video showcasing the current progress of our Data Science Agent under development:
https://github.com/user-attachments/assets/3eccbecb-34a4-4c81-bce4-d3f8862f7305
# 🌟 Introduction
<div align="center">
<img src="docs/_static/scen.png" alt="Our focused scenario" style="width:80%; ">
</div>
R&D-Agent aims to automate the most critical and valuable aspects of the industrial R&D process, and we begin with focusing on the data-driven scenarios to streamline the development of models and data.
Methodologically, we have identified a framework with two key components: 'R' for proposing new ideas and 'D' for implementing them.
We believe that the automatic evolution of R&D will lead to solutions of significant industrial value.
<!-- Tag Cloud -->
R&D is a very general scenario. The advent of R&D-Agent can be your
- 💰 **Automatic Quant Factory** ([🎥Demo Video](https://rdagent.azurewebsites.net/factor_loop)|[▶️YouTube](https://www.youtube.com/watch?v=X4DK2QZKaKY&t=6s))
- 🤖 **Data Mining Agent:** Iteratively proposing data & models ([🎥Demo Video 1](https://rdagent.azurewebsites.net/model_loop)|[▶️YouTube](https://www.youtube.com/watch?v=dm0dWL49Bc0&t=104s)) ([🎥Demo Video 2](https://rdagent.azurewebsites.net/dmm)|[▶️YouTube](https://www.youtube.com/watch?v=VIaSTZuoZg4)) and implementing them by gaining knowledge from data.
- 🦾 **Research Copilot:** Auto read research papers ([🎥Demo Video](https://rdagent.azurewebsites.net/report_model)|[▶️YouTube](https://www.youtube.com/watch?v=BiA2SfdKQ7o)) / financial reports ([🎥Demo Video](https://rdagent.azurewebsites.net/report_factor)|[▶️YouTube](https://www.youtube.com/watch?v=ECLTXVcSx-c)) and implement model structures or building datasets.
- 🤖 **Kaggle Agent:** Auto Model Tuning and Feature Engineering([🎥Demo Video Coming Soon...]()) and implementing them to achieve more in competitions.
- ...
You can click the links above to view the demo. We're continuously adding more methods and scenarios to the project to enhance your R&D processes and boost productivity.
Additionally, you can take a closer look at the examples in our **[🖥️ Live Demo](https://rdagent.azurewebsites.net/)**.
<div align="center">
<a href="https://rdagent.azurewebsites.net/" target="_blank">
<img src="docs/_static/demo.png" alt="Watch the demo" width="80%">
</a>
</div>
# ⚡ Quick start
### RD-Agent currently only supports Linux.
You can try above demos by running the following command:
### 🐳 Docker installation.
Users must ensure Docker is installed before attempting most scenarios. Please refer to the [official 🐳Docker page](https://docs.docker.com/engine/install/) for installation instructions.
Ensure the current user can run Docker commands **without using sudo**. You can verify this by executing `docker run hello-world`.
### 🐍 Create a Conda Environment
- Create a new conda environment with Python (3.10 and 3.11 are well-tested in our CI):
```sh
conda create -n rdagent python=3.10
```
- Activate the environment:
```sh
conda activate rdagent
```
### 🛠️ Install the R&D-Agent
#### For Users
- You can directly install the R&D-Agent package from PyPI:
```sh
pip install rdagent
```
#### For Developers
- If you want to try the latest version or contribute to RD-Agent, you can install it from the source and follow the development setup:
```sh
git clone https://github.com/microsoft/RD-Agent
cd RD-Agent
make dev
```
More details can be found in the [development setup](https://rdagent.readthedocs.io/en/latest/development.html).
### 💊 Health check
- rdagent provides a health check that currently checks two things.
- whether the docker installation was successful.
- whether the default port used by the [rdagent ui](https://github.com/microsoft/RD-Agent?tab=readme-ov-file#%EF%B8%8F-monitor-the-application-results) is occupied.
```sh
rdagent health_check --no-check-env
```
### ⚙️ Configuration
- The demos requires following ability:
- ChatCompletion
- json_mode
- embedding query
You can set your Chat Model and Embedding Model in the following ways:
> **🔥 Attention**: We now provide experimental support for **DeepSeek** models! You can use DeepSeek's official API for cost-effective and high-performance inference. See the configuration example below for DeepSeek setup.
- **Using LiteLLM (Default)**: We now support LiteLLM as a backend for integration with multiple LLM providers. You can configure in multiple ways:
**Option 1: Unified API base for both models**
*Configuration Example: `OpenAI` Setup :*
```bash
cat << EOF > .env
# Set to any model supported by LiteLLM.
CHAT_MODEL=gpt-4o
EMBEDDING_MODEL=text-embedding-3-small
# Configure unified API base
OPENAI_API_BASE=<your_unified_api_base>
OPENAI_API_KEY=<replace_with_your_openai_api_key>
```
*Configuration Example: `Azure OpenAI` Setup :*
> Before using this configuration, please confirm in advance that your `Azure OpenAI API key` supports `embedded models`.
```bash
cat << EOF > .env
EMBEDDING_MODEL=azure/<Model deployment supporting embedding>
CHAT_MODEL=azure/<your deployment name>
AZURE_API_KEY=<replace_with_your_openai_api_key>
AZURE_API_BASE=<your_unified_api_base>
AZURE_API_VERSION=<azure api version>
```
**Option 2: Separate API bases for Chat and Embedding models**
```bash
cat << EOF > .env
# Set to any model supported by LiteLLM.
# Configure separate API bases for chat and embedding
# CHAT MODEL:
CHAT_MODEL=gpt-4o
OPENAI_API_BASE=<your_chat_api_base>
OPENAI_API_KEY=<replace_with_your_openai_api_key>
# EMBEDDING MODEL:
# TAKE siliconflow as an example, you can use other providers.
# Note: embedding requires litellm_proxy prefix
EMBEDDING_MODEL=litellm_proxy/BAAI/bge-large-en-v1.5
LITELLM_PROXY_API_KEY=<replace_with_your_siliconflow_api_key>
LITELLM_PROXY_API_BASE=https://api.siliconflow.cn/v1
```
*Configuration Example: `DeepSeek` Setup :*
>Since many users encounter configuration errors when setting up DeepSeek. Here's a complete working example for DeepSeek Setup:
```bash
cat << EOF > .env
# CHAT MODEL: Using DeepSeek Official API
CHAT_MODEL=deepseek/deepseek-chat
DEEPSEEK_API_KEY=<replace_with_your_deepseek_api_key>
# EMBEDDING MODEL: Using SiliconFlow for embedding since deepseek has no embedding model.
# Note: embedding requires litellm_proxy prefix
EMBEDDING_MODEL=litellm_proxy/BAAI/bge-m3
LITELLM_PROXY_API_KEY=<replace_with_your_siliconflow_api_key>
LITELLM_PROXY_API_BASE=https://api.siliconflow.cn/v1
```
Notice: If you are using reasoning models that include thought processes in their responses (such as \<think> tags), you need to set the following environment variable:
```bash
REASONING_THINK_RM=True
```
You can also use a deprecated backend if you only use `OpenAI API` or `Azure OpenAI` directly. For this deprecated setting and more configuration information, please refer to the [documentation](https://rdagent.readthedocs.io/en/latest/installation_and_configuration.html).
- If your environment configuration is complete, please execute the following commands to check if your configuration is valid. This step is necessary.
```bash
rdagent health_check
```
### 🚀 Run the Application
The **[🖥️ Live Demo](https://rdagent.azurewebsites.net/)** is implemented by the following commands(each item represents one demo, you can select the one you prefer):
- Run the **Automated Quantitative Trading & Iterative Factors Model Joint Evolution**: [Qlib](http://github.com/microsoft/qlib) self-loop factor & model proposal and implementation application
```sh
rdagent fin_quant
```
- Run the **Automated Quantitative Trading & Iterative Factors Evolution**: [Qlib](http://github.com/microsoft/qlib) self-loop factor proposal and implementation application
```sh
rdagent fin_factor
```
- Run the **Automated Quantitative Trading & Iterative Model Evolution**: [Qlib](http://github.com/microsoft/qlib) self-loop model proposal and implementation application
```sh
rdagent fin_model
```
- Run the **Automated Quantitative Trading & Factors Extraction from Financial Reports**: Run the [Qlib](http://github.com/microsoft/qlib) factor extraction and implementation application based on financial reports
```sh
# 1. Generally, you can run this scenario using the following command:
rdagent fin_factor_report --report-folder=<Your financial reports folder path>
# 2. Specifically, you need to prepare some financial reports first. You can follow this concrete example:
wget https://github.com/SunsetWolf/rdagent_resource/releases/download/reports/all_reports.zip
unzip all_reports.zip -d git_ignore_folder/reports
rdagent fin_factor_report --report-folder=git_ignore_folder/reports
```
- Run the **Automated Model Research & Development Copilot**: model extraction and implementation application
```sh
# 1. Generally, you can run your own papers/reports with the following command:
rdagent general_model <Your paper URL>
# 2. Specifically, you can do it like this. For more details and additional paper examples, use `rdagent general_model -h`:
rdagent general_model "https://arxiv.org/pdf/2210.09789"
```
- Run the **Automated Medical Prediction Model Evolution**: Medical self-loop model proposal and implementation application
```bash
# Generally, you can run the data science program with the following command:
rdagent data_science --competition <your competition name>
# Specifically, you need to create a folder for storing competition files (e.g., competition description file, competition datasets, etc.), and configure the path to the folder in your environment. In addition, you need to use chromedriver when you download the competition descriptors, which you can follow for this specific example:
# 1. Download the dataset, extract it to the target folder.
wget https://github.com/SunsetWolf/rdagent_resource/releases/download/ds_data/arf-12-hours-prediction-task.zip
unzip arf-12-hours-prediction-task.zip -d ./git_ignore_folder/ds_data/
# 2. Configure environment variables in the `.env` file
dotenv set DS_LOCAL_DATA_PATH "$(pwd)/git_ignore_folder/ds_data"
dotenv set DS_CODER_ON_WHOLE_PIPELINE True
dotenv set DS_IF_USING_MLE_DATA False
dotenv set DS_SAMPLE_DATA_BY_LLM False
dotenv set DS_SCEN rdagent.scenarios.data_science.scen.DataScienceScen
# 3. run the application
rdagent data_science --competition arf-12-hours-prediction-task
```
**NOTE:** For more information about the dataset, please refer to the [documentation](https://rdagent.readthedocs.io/en/latest/scens/data_science.html).
- Run the **Automated Kaggle Model Tuning & Feature Engineering**: self-loop model proposal and feature engineering implementation application <br />
> Using **tabular-playground-series-dec-2021** as an example. <br />
> 1. Register and login on the [Kaggle](https://www.kaggle.com/) website. <br />
> 2. Configuring the Kaggle API. <br />
> (1) Click on the avatar (usually in the top right corner of the page) -> `Settings` -> `Create New Token`, A file called `kaggle.json` will be downloaded. <br />
> (2) Move `kaggle.json` to `~/.config/kaggle/` <br />
> (3) Modify the permissions of the kaggle.json file. Reference command: `chmod 600 ~/.config/kaggle/kaggle.json` <br />
> 3. Join the competition: Click `Join the competition` -> `I Understand and Accept` at the bottom of the [competition details page](https://www.kaggle.com/competitions/tabular-playground-series-dec-2021/data).
```bash
# Generally, you can run the Kaggle competition program with the following command:
rdagent data_science --competition <your competition name>
# 1. Configure environment variables in the `.env` file
mkdir -p ./git_ignore_folder/ds_data
dotenv set DS_LOCAL_DATA_PATH "$(pwd)/git_ignore_folder/ds_data"
dotenv set DS_CODER_ON_WHOLE_PIPELINE True
dotenv set DS_IF_USING_MLE_DATA True
dotenv set DS_SAMPLE_DATA_BY_LLM True
dotenv set DS_SCEN rdagent.scenarios.data_science.scen.KaggleScen
# 2. run the application
rdagent data_science --competition tabular-playground-series-dec-2021
```
### 🖥️ Monitor the Application Results
#### Streamlit UI
Use the Streamlit UI to view run logs, especially for the `data_science` scenario.
```sh
rdagent ui --port 19899 --log-dir <your log folder like "log/"> --data-science
```
About the `data_science` parameter: If you want to see the logs of the data science scenario, set the `data_science` parameter to `True`; otherwise set it to `False`.
#### Web UI
We also provide a separate web frontend in `web/` for the Flask backend started by `server_ui`.
**NOTE:** This web UI is different from `rdagent ui`. The current web UI does not support the `data_science` scenario yet. For the `data_science` scenario, please continue to use `rdagent ui --data-science`.
```sh
cd web
npm install
```
To build the frontend for the Flask backend, generate the static assets into the default directory used by `server_ui`:
```sh
cd web
npm run build:flask
```
By default, `server_ui` serves static files from `./git_ignore_folder/static`. If you need a different location, set the `UI_STATIC_PATH` environment variable before starting the backend.
Start the Flask backend and serve the built frontend together with the real-time APIs:
```sh
rdagent server_ui --port 19899
```
After that, open `http://127.0.0.1:19899` in your browser.
#### Common Notes
Port `19899` is used in the examples above. Before starting either UI, check whether this port is already occupied. If it is, please change it to another available port.
You can check whether the port is occupied by running:
```sh
rdagent health_check --no-check-env --no-check-docker
```
# 🏭 Scenarios
We have applied R&D-Agent to multiple valuable data-driven industrial scenarios.
## 🎯 Goal: Agent for Data-driven R&D
In this project, we are aiming to build an Agent to automate Data-Driven R\&D that can
+ 📄 Read real-world material (reports, papers, etc.) and **extract** key formulas, descriptions of interested **features** and **models**, which are the key components of data-driven R&D .
+ 🛠️ **Implement** the extracted formulas (e.g., features, factors, and models) in runnable codes.
+ Due to the limited ability of LLM in implementing at once, build an evolving process for the agent to improve performance by learning from feedback and knowledge.
+ 💡 Propose **new ideas** based on current knowledge and observations.
<!--  -->
## 📈 Scenarios/Demos
In the two key areas of data-driven scenarios, model implementation and data building, our system aims to serve two main roles: 🦾Copilot and 🤖Agent.
- The 🦾Copilot follows human instructions to automate repetitive tasks.
- The 🤖Agent, being more autonomous, actively proposes ideas for better results in the future.
The supported scenarios are listed below:
| Scenario/Target | Model Implementation | Data Building |
| -- | -- | -- |
| **💹 Finance** | 🤖 [Iteratively Proposing Ideas & Evolving](https://rdagent.azurewebsites.net/model_loop)[▶️YouTube](https://www.youtube.com/watch?v=dm0dWL49Bc0&t=104s) | 🤖 [Iteratively Proposing Ideas & Evolving](https://rdagent.azurewebsites.net/factor_loop) [▶️YouTube](https://www.youtube.com/watch?v=X4DK2QZKaKY&t=6s) <br/> 🦾 [Auto reports reading & implementation](https://rdagent.azurewebsites.net/report_factor)[▶️YouTube](https://www.youtube.com/watch?v=ECLTXVcSx-c) |
| **🩺 Medical** | 🤖 [Iteratively Proposing Ideas & Evolving](https://rdagent.azurewebsites.net/dmm)[▶️YouTube](https://www.youtube.com/watch?v=VIaSTZuoZg4) | - |
| **🏭 General** | 🦾 [Auto paper reading & implementation](https://rdagent.azurewebsites.net/report_model)[▶️YouTube](https://www.youtube.com/watch?v=BiA2SfdKQ7o) <br/> 🤖 Auto Kaggle Model Tuning | 🤖Auto Kaggle feature Engineering |
- **[RoadMap](https://rdagent.readthedocs.io/en/latest/scens/data_science.html#roadmap)**: Currently, we are working hard to add new features to the Kaggle scenario.
Different scenarios vary in entrance and configuration. Please check the detailed setup tutorial in the scenarios documents.
Here is a gallery of [successful explorations](https://github.com/SunsetWolf/rdagent_resource/releases/download/demo_traces/demo_traces.zip) (5 traces showed in **[🖥️ Live Demo](https://rdagent.azurewebsites.net/)**). You can download and view the execution trace using [this command](https://github.com/microsoft/RD-Agent?tab=readme-ov-file#%EF%B8%8F-monitor-the-application-results) from the documentation.
Please refer to **[📖readthedocs_scen](https://rdagent.readthedocs.io/en/latest/scens/catalog.html)** for more details of the scenarios.
# ⚙️ Framework
<div align="center">
<img src="docs/_static/Framework-RDAgent.png" alt="Framework-RDAgent" width="85%">
</div>
Automating the R&D process in data science is a highly valuable yet underexplored area in industry. We propose a framework to push the boundaries of this important research field.
The research questions within this framework can be divided into three main categories:
| Research Area | Paper/Work List |
|--------------------|-----------------|
| **Benchmark the R&D abilities** | [Benchmark](#benchmark) |
| **Idea proposal:** Explore new ideas or refine existing ones | [Research](#research) |
| **Ability to realize ideas:** Implement and execute ideas | [Development](#development) |
We believe that the key to delivering high-quality solutions lies in the ability to evolve R&D capabilities. Agents should learn like human experts, continuously improving their R&D skills.
More documents can be found in the **[📖 readthedocs](https://rdagent.readthedocs.io/)**.
# 📃 Paper/Work list
## Overall Technical Report
- [R&D-Agent: An LLM-Agent Framework Towards Autonomous Data Science](https://arxiv.org/abs/2505.14738)
```BibTeX
@misc{yang2025rdagentllmagentframeworkautonomous,
title={R&D-Agent: An LLM-Agent Framework Towards Autonomous Data Science},
author={Xu Yang and Xiao Yang and Shikai Fang and Yifei Zhang and Jian Wang and Bowen Xian and Qizheng Li and Jingyuan Li and Minrui Xu and Yuante Li and Haoran Pan and Yuge Zhang and Weiqing Liu and Yelong Shen and Weizhu Chen and Jiang Bian},
year={2025},
eprint={2505.14738},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2505.14738},
}
```

## 📊 Benchmark
- [Towards Data-Centric Automatic R&D](https://arxiv.org/abs/2404.11276)
```BibTeX
@misc{chen2024datacentric,
title={Towards Data-Centric Automatic R&D},
author={Haotian Chen and Xinjie Shen and Zeqi Ye and Wenjun Feng and Haoxue Wang and Xiao Yang and Xu Yang and Weiqing Liu and Jiang Bian},
year={2024},
eprint={2404.11276},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
```

## 🔍 Research
In a data mining expert's daily research and development process, they propose a hypothesis (e.g., a model structure like RNN can capture patterns in time-series data), design experiments (e.g., finance data contains time-series and we can verify the hypothesis in this scenario), implement the experiment as code (e.g., Pytorch model structure), and then execute the code to get feedback (e.g., metrics, loss curve, etc.). The experts learn from the feedback and improve in the next iteration.
Based on the principles above, we have established a basic method framework that continuously proposes hypotheses, verifies them, and gets feedback from the real-world practice. This is the first scientific research automation framework that supports linking with real-world verification.
For more detail, please refer to our **[🖥️ Live Demo page](https://rdagent.azurewebsites.net)**.
## 🛠️ Development
- [Collaborative Evolving Strategy for Automatic Data-Centric Development](https://arxiv.org/abs/2407.18690)
```BibTeX
@misc{yang2024collaborative,
title={Collaborative Evolving Strategy for Automatic Data-Centric Development},
author={Xu Yang and Haotian Chen and Wenjun Feng and Haoxue Wang and Zeqi Ye and Xinjie Shen and Xiao Yang and Shizhao Sun and Weiqing Liu and Jiang Bian},
year={2024},
eprint={2407.18690},
archivePrefix={arXiv},
primaryClass={cs.AI}
}
```

## Deep Application in Diverse Scenarios
- [R&D-Agent-Quant: A Multi-Agent Framework for Data-Centric Factors and Model Joint Optimization](https://arxiv.org/abs/2505.15155)
```BibTeX
@misc{li2025rdagentquantmultiagentframeworkdatacentric,
title={R&D-Agent-Quant: A Multi-Agent Framework for Data-Centric Factors and Model Joint Optimization},
author={Yuante Li and Xu Yang and Xiao Yang and Minrui Xu and Xisen Wang and Weiqing Liu and Jiang Bian},
year={2025},
eprint={2505.15155},
archivePrefix={arXiv},
primaryClass={q-fin.CP},
url={https://arxiv.org/abs/2505.15155},
}
```

# 🤝 Contributing
We welcome contributions and suggestions to improve R&D-Agent. Please refer to the [Contributing Guide](CONTRIBUTING.md) for more details on how to contribute.
Before submitting a pull request, ensure that your code passes the automatic CI checks.
## 📝 Guidelines
This project welcomes contributions and suggestions.
Contributing to this project is straightforward and rewarding. Whether it's solving an issue, addressing a bug, enhancing documentation, or even correcting a typo, every contribution is valuable and helps improve R&D-Agent.
To get started, you can explore the issues list, or search for `TODO:` comments in the codebase by running the command `grep -r "TODO:"`.
<img src="https://img.shields.io/github/contributors-anon/microsoft/RD-Agent"/>
<a href="https://github.com/microsoft/RD-Agent/graphs/contributors">
<img src="https://contrib.rocks/image?repo=microsoft/RD-Agent&max=100&columns=15" />
</a>
Before we released R&D-Agent as an open-source project on GitHub, it was an internal project within our group. Unfortunately, the internal commit history was not preserved when we removed some confidential code. As a result, some contributions from our group members, including Haotian Chen, Wenjun Feng, Haoxue Wang, Zeqi Ye, Xinjie Shen, and Jinhui Li, were not included in the public commits.
# ⚖️ Legal disclaimer
<p style="line-height: 1; font-style: italic;">The RD-agent is provided “as is”, without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. The RD-agent is aimed to facilitate research and development process in the financial industry and not ready-to-use for any financial investment or advice. Users shall independently assess and test the risks of the RD-agent in a specific use scenario, ensure the responsible use of AI technology, including but not limited to developing and integrating risk mitigation measures, and comply with all applicable laws and regulations in all applicable jurisdictions. The RD-agent does not provide financial opinions or reflect the opinions of Microsoft, nor is it designed to replace the role of qualified financial professionals in formulating, assessing, and approving finance products. The inputs and outputs of the RD-agent belong to the users and users shall assume all liability under any theory of liability, whether in contract, torts, regulatory, negligence, products liability, or otherwise, associated with use of the RD-agent and any inputs and outputs thereof.</p>
================================================
FILE: SECURITY.md
================================================
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
<!-- END MICROSOFT SECURITY.MD BLOCK -->
================================================
FILE: SUPPORT.md
================================================
# TODO: The maintainer of this repo has not yet edited this file
**REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
- **No CSS support:** Fill out this template with information about how to file issues and get help.
- **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
- **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
*Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
# Support
## How to file issues and get help
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
## Microsoft Support Policy
Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
================================================
FILE: TODO.md
================================================
We encourage to set the TODOs in code. But some TODOs are more global.
So we place it here.
- [ ] Aligning the naming of files in components & scenarios.
- We would like to have the same logic for naming convention in components(reusable components for all scenarios) and scenarios (componets for specific scenario).
- But now we have following mismatch
- `coder` in `components` & `developer` in `components`
- [ ] The name of the folders mismatch with the content in them.
- Why are scenarios in experiments?
================================================
FILE: constraints/3.10.txt
================================================
azure-identity==1.17.1
dill==0.3.9
pillow==10.4.0
psutil==6.1.0
scipy==1.14.1
================================================
FILE: constraints/3.11.txt
================================================
azure-identity==1.17.1
dill==0.3.9
pillow==10.4.0
psutil==6.1.0
scipy==1.14.1
================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
================================================
FILE: docs/_static/RD2bench.json
================================================
{
"alpha053_15": {
"description": "Reversal class factor, negative delta of a ratio involving close, low, and high prices over 15 days.",
"formulation": "-1 times Deltaleft(frac{(text{close} - text{low}) - (text{high} - text{close})}{text{close} - text{low}}, 15right)",
"variables": {
"Delta(x, d)": "Change in 'x' over 'd' days.",
"text{close}": "Closing price of the stock.",
"text{low}": "Lowest price of the stock for the day.",
"text{high}": "Highest price of the stock for the day."
},
"Category": "Volume&Price",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha053\nnew_df['ratio'] = (new_df['$close'] - new_df['$low'] - (new_df['$high'] - new_df['$close'])) / (new_df['$close'] - new_df['$low'])\n# the change of ratio in new_df over the 15 days\nnew_df['result']=-new_df['ratio'].diff(15)\n# transfer the result to series\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
},
"liquidity_imbalance": {
"description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
"formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
"variables": {
"std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
"mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
"liquidity_imbalance": "(bid_size-ask_size)/(bid_size+ask_size), we use something like bidV for the size"
},
"Category": "High-Frequency",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/(sample_df['bidV']+sample_df['askV'])\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"liquidity_imbalance_2": {
"description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
"formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
"variables": {
"std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
"mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
"liquidity_imbalance": "(bid_size-ask_size)/2*(bid_size+ask_size), we use something like bidV for the size"
},
"Category": "High-Frequency",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/((sample_df['bidV']+sample_df['askV'])*2)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"liquidity_imbalance_3": {
"description": "liquidity_imbalance=std(minute trading liquidity_imbalance)/mean(minute trading liquidity_imbalance).",
"formulation": "liquidity_imbalance = frac{text{std}(text{minute trading liquidity_imbalance})}{text{mean}(text{minute liquidity_imbalance})}",
"variables": {
"std(minute liquidity_imbalance)": "Standard deviation of trading liquidity_imbalance for each minute of the trading day.",
"mean(minute liquidity_imbalance)": "Mean of trading liquidity_imbalance for each minute of the trading day.",
"liquidity_imbalance": "(bid_size-ask_size)/3*(bid_size+ask_size), we use something like bidV for the size"
},
"Category": "High-Frequency",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['liquidity_imbalance']=(sample_df['bidV']-sample_df['askV'])/((sample_df['bidV']+sample_df['askV'])*3)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['liquidity_imbalance']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['liquidity_imbalance'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['liquidity_imbalance']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"micro_price": {
"description": "micro_price=std(minute trading micro_price)/mean(minute trading micro_price).",
"formulation": "micro_price = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
"variables": {
"std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
"mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
"micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / (df['bid_size'] + df['ask_size'])"
},
"Category": "High-Frequency",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/(sample_df['bidV']+sample_df['askV'])\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"micro_price_2": {
"description": "micro_price_2=std(minute trading micro_price)/mean(minute trading micro_price).",
"formulation": "micro_price_2 = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
"variables": {
"std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
"mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
"micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / 2*(df['bid_size'] + df['ask_size']), we use something like bidV for the size"
},
"Category": "High-Frequency",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/((sample_df['bidV']+sample_df['askV'])*2)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"micro_price_3": {
"description": "micro_price_3=std(minute trading micro_price)/mean(minute trading micro_price).",
"formulation": "micro_price_3 = frac{text{std}(text{minute trading micro_price})}{text{mean}(text{minute micro_price})}",
"variables": {
"std(minute micro_price)": "Standard deviation of trading micro_price for each minute of the trading day.",
"mean(minute micro_price)": "Mean of trading micro_price for each minute of the trading day.",
"micro_price": "((df['bid_price'] * df['ask_size']) + (df['ask_price'] * df['bid_size'])) / 3*(df['bid_size'] + df['ask_size']), we use something like bidV for the size"
},
"Category": "High-Frequency",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['micro_price']=(sample_df['bid']*sample_df['askV']+sample_df['ask']*sample_df['bidV'])/((sample_df['bidV']+sample_df['askV'])*3)\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['micro_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\n# Calculate Z value for each instrument per day\nstats['micro_price'] = stats['std'] / stats['mean']\n# Display the calculated Z values\nresult=stats['micro_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"mid_price": {
"description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
"formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
"variables": {
"std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
"mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
"mid_price": "The average of the bid and ask prices."
},
"Category": "High-Frequency",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/2\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"mid_price_2": {
"description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
"formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
"variables": {
"std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
"mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
"mid_price_2": "the average of the bid and ask prices plus the the average of the bid and ask size (bidV and askV)."
},
"Category": "High-Frequency",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/2+(sample_df['bidV']+sample_df['askV'])/2\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"mid_price_3": {
"description": "mid_price=std(minute trading mid_price)/mean(minute trading mid_price).",
"formulation": "mid_price = frac{text{std}(text{minute trading mid price})}{text{mean}(text{minute mid price})}",
"variables": {
"std(minute mid_price)": "Standard deviation of trading mid_price for each minute of the trading day.",
"mean(minute mid_price)": "Mean of trading mid_price for each minute of the trading day.",
"mid_price_3": "The coefficient of variation (CV) of the mid-price for each minute of the trading day, calculated as the standard deviation of the mid-price divided by the mean mid-price."
},
"Category": "High-Frequency",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_hf = pd.read_hdf('high_freq.h5')\nsample_df= data_hf.reset_index()\n# Convert 'datetime' column to datetime and extract date for grouping\nsample_df['date'] = sample_df['datetime'].dt.date\nsample_df['mid_price']=(sample_df['bid']+sample_df['ask'])/3\n# Group by instrument and date\ngrouped = sample_df.groupby(['date','instrument'])['mid_price']\n# Calculate mean and standard deviation of the volume for each group\nstats = grouped.agg(['mean', 'std'])\nstats['mid_price'] = stats['std'] / stats['mean']\nresult=stats['mid_price']\nresult.index.names = ['datetime','instrument']\n# result = result.swaplevel().sort_index()\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE": {
"description": "Constructed using the ranking difference between PB and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
"formulation": "text{rank}(PB_t) - rank(ROE_t)",
"variables": {
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank'] - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE_2": {
"description": "Constructed using the ranking difference between PB/2 and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
"formulation": "text{rank}(PB_t)/2 - rank(ROE_t)",
"variables": {
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank']/2 - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE_3": {
"description": "Constructed using the ranking difference between PB/3 and ROE, with regression versions of PB and ROE replacing original PB and ROE to obtain reconstructed factor values.",
"formulation": "text{rank}(PB_t)/3 - rank(ROE_t)",
"variables": {
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Easy",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\ndata = data_f.reset_index()\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank']/3 - data['ROE_rank']\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE_movement": {
"description": "PB_ROE_movement=five day PB_ROE movement indicator(-1 and 1 or 0).",
"formulation": "PB_ROE_movement = 5_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
"variables": {
"PB_ROE": "the ranking difference between PB and ROE.",
"5_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 5 days ago, -1 if PB_ROE is lower than the PB_ROE 5 days ago, 0 if PB_ROE is the same as the PB_ROE 5 days ago.",
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=5).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE_movement_10": {
"description": "PB_ROE_movement=10 days PB_ROE movement indicator(-1 and 1 or 0).",
"formulation": "PB_ROE_movement = 10_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
"variables": {
"PB_ROE": "the ranking difference between PB and ROE.",
"10_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 10 days ago, -1 if PB_ROE is lower than the PB_ROE 10 days ago, 0 if PB_ROE is the same as the PB_ROE 10 days ago.",
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=10).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"PB_ROE_movement_20": {
"description": "PB_ROE_movement=20 days PB_ROE movement indicator(-1 and 1 or 0).",
"formulation": "PB_ROE_movement = 20_day_movement(PB_ROE), PB_ROE = text{rank}(PB_t) - rank(ROE_t)",
"variables": {
"PB_ROE": "the ranking difference between PB and ROE.",
"20_day_PB_ROE_movement": "1 if PB_ROE is higher than the PB_ROE 20 days ago, -1 if PB_ROE is lower than the PB_ROE 20 days ago, 0 if PB_ROE is the same as the PB_ROE 20 days ago.",
"text{rank}(PB_t)": "Ranking of regression version PB on cross-section at time t.",
"text{rank}(ROE_t)": "Ranking of regression version single-quarter ROE on cross-section at time t."
},
"Category": "Fundamentals",
"Difficulty": "Hard",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Calculate the rank of PB and ROE\nsample_df['PB_rank'] = sample_df.groupby('datetime')['B/P'].rank()\nsample_df['ROE_rank'] = sample_df.groupby('datetime')['ROE'].rank()\nsample_df['PB_ROE'] = sample_df['PB_rank'] - sample_df['ROE_rank']\n# Group by instrument and date\nsample_df['PB_ROE_movement'] = sample_df['PB_ROE'].diff(periods=20).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['PB_ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['PB_ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"ROE_movement": {
"description": "ROE_movement=five day ROE movement indicator(-1 and 1 or 0).",
"formulation": "ROE_movement = 5_day_movement(ROE)",
"variables": {
"ROE": "ROE in fundamental statistics.",
"5_day_ROE_movement": "1 if ROE is higher than the ROE 5 days ago, -1 if ROE is lower than the ROE 5 days ago, 0 if ROE is the same as the ROE 5 days ago."
},
"Category": "Fundamentals",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=5).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"ROE_movement_10": {
"description": "ROE_movement_10=ten day ROE movement indicator(-1 and 1 or 0).",
"formulation": "ROE_movement = 10_day_movement(ROE)",
"variables": {
"ROE": "ROE in fundamental statistics.",
"10_day_ROE_movement": "1 if ROE is higher than the ROE 10 days ago, -1 if ROE is lower than the ROE 10 days ago, 0 if ROE is the same as the ROE 10 days ago."
},
"Category": "Fundamentals",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=10).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"ROE_movement_20": {
"description": "ROE_movement_20=20 day ROE movement indicator(-1 and 1 or 0).",
"formulation": "ROE_movement_20 = 20_day_movement(ROE)",
"variables": {
"ROE": "ROE in fundamental statistics.",
"20_day_ROE_movement": "1 if ROE is higher than the ROE 20 days ago, -1 if ROE is lower than the ROE 20 days ago, 0 if ROE is the same as the ROE 20 days ago."
},
"Category": "Fundamentals",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_f = pd.read_hdf('daily_f.h5')\nsample_df = data_f.reset_index()\n# Group by instrument and date\nsample_df['ROE_movement'] = sample_df['ROE'].diff(periods=20).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))\n#calculate the mid_price_movement ratio for each day\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(sample_df['ROE_movement']).set_index(data_f.index)\n# transfer the result to series\nresult=result['ROE_movement']\nresult.to_hdf('result.h5', key='data')"
},
"alpha_pv_diff": {
"description": "alpha_pv_diff is defined as the ratio of the difference between close prices 10 days change and open prices 10 days change to the sum of the highest minus lowest prices plus a small constant.",
"formulation": "frac{(text{close_diff10} - text{open_diff10})}{(text{high} - text{low} + 0.001)}",
"variables": {
"close": "Closing price of the stock",
"open": "Opening price of the stock",
"high": "Highest price of the stock during the day",
"low": "Lowest price of the stock during the day"
},
"Category": "Volume&Price",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(10) - new_df['$open'].diff(10)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
},
"alpha_pv_diff_15": {
"description": "alpha_pv_diff is defined as the ratio of the difference between close prices 15 days change and open prices 15 days change to the sum of the highest minus lowest prices plus a small constant.",
"formulation": "frac{(text{close_diff15} - text{open_diff15})}{(text{high} - text{low} + 0.001)}",
"variables": {
"close": "Closing price of the stock",
"open": "Opening price of the stock",
"high": "Highest price of the stock during the day",
"low": "Lowest price of the stock during the day"
},
"Category": "Volume&Price",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(15) - new_df['$open'].diff(15)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
},
"alpha_pv_diff_20": {
"description": "alpha_pv_diff is defined as the ratio of the difference between close prices 20 days change and open prices 20 days change to the sum of the highest minus lowest prices plus a small constant.",
"formulation": "frac{(text{close_diff20} - text{open_diff20})}{(text{high} - text{low} + 0.001)}",
"variables": {
"close": "Closing price of the stock",
"open": "Opening price of the stock",
"high": "Highest price of the stock during the day",
"low": "Lowest price of the stock during the day"
},
"Category": "Volume&Price",
"Difficulty": "Medium",
"gt_code": "import pandas as pd\ndata_pv = pd.read_hdf('daily_pv.h5')\nnew_df= data_pv.reset_index()\n# Calculate Alpha101\nnew_df['result'] = (new_df['$close'].diff(20) - new_df['$open'].diff(20)) / (new_df['$high'] - new_df['$low'] + 0.001)\n# keep the index of the original dataframe\nresult=pd.DataFrame(new_df['result']).set_index(data_pv.index)\n# transfer the result to series\nresult=result['result']\nresult.to_hdf('result.h5', key='data')"
},
"alpha_pv_diff_pct": {
"description": "alpha_pv is defined as the ratio of the differe
gitextract_5wquvenf/
├── .bumpversion.cfg
├── .commitlintrc.js
├── .devcontainer/
│ ├── Dockerfile
│ ├── README.md
│ ├── devcontainer.json
│ └── env
├── .github/
│ ├── FUNDING.yml
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug-report.md
│ │ ├── documentation.md
│ │ ├── feature-request.md
│ │ └── question.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── dependabot.yml
│ └── workflows/
│ ├── ci.yml
│ ├── pr.yml
│ ├── readthedocs-preview.yml
│ └── release.yml
├── .gitignore
├── .readthedocs.yaml
├── .streamlit/
│ └── config.toml
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── README.md
├── SECURITY.md
├── SUPPORT.md
├── TODO.md
├── constraints/
│ ├── 3.10.txt
│ └── 3.11.txt
├── docs/
│ ├── Makefile
│ ├── _static/
│ │ └── RD2bench.json
│ ├── api_reference.rst
│ ├── changelog.md
│ ├── conf.py
│ ├── development.rst
│ ├── index.rst
│ ├── installation_and_configuration.rst
│ ├── introduction.rst
│ ├── make.bat
│ ├── policy.rst
│ ├── project_framework_introduction.rst
│ ├── requirements.txt
│ ├── research/
│ │ ├── benchmark.rst
│ │ ├── catalog.rst
│ │ └── dev.rst
│ ├── scens/
│ │ ├── catalog.rst
│ │ ├── data_agent_fin.rst
│ │ ├── data_copilot_fin.rst
│ │ ├── data_science.rst
│ │ ├── finetune.rst
│ │ ├── model_agent_fin.rst
│ │ ├── model_copilot_general.rst
│ │ └── quant_agent_fin.rst
│ └── ui.rst
├── pyproject.toml
├── rdagent/
│ ├── app/
│ │ ├── CI/
│ │ │ ├── README.md
│ │ │ ├── ci.ipynb
│ │ │ ├── prompts.yaml
│ │ │ └── run.py
│ │ ├── benchmark/
│ │ │ ├── factor/
│ │ │ │ ├── analysis.py
│ │ │ │ └── eval.py
│ │ │ └── model/
│ │ │ ├── README.md
│ │ │ └── eval.py
│ │ ├── cli.py
│ │ ├── data_science/
│ │ │ ├── conf.py
│ │ │ ├── debug.py
│ │ │ └── loop.py
│ │ ├── finetune/
│ │ │ ├── data_science/
│ │ │ │ ├── conf.py
│ │ │ │ ├── loop.py
│ │ │ │ ├── scen.py
│ │ │ │ └── tpl/
│ │ │ │ ├── components/
│ │ │ │ │ └── coder/
│ │ │ │ │ └── data_science/
│ │ │ │ │ └── pipeline/
│ │ │ │ │ └── prompts.yaml
│ │ │ │ └── scenarios/
│ │ │ │ └── data_science/
│ │ │ │ └── proposal/
│ │ │ │ └── exp_gen/
│ │ │ │ └── prompts_v2.yaml
│ │ │ ├── llm/
│ │ │ │ ├── README.md
│ │ │ │ ├── conf.py
│ │ │ │ ├── job/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── run_ft_job.sh
│ │ │ │ │ ├── scenarios.json
│ │ │ │ │ └── tasks.json.example
│ │ │ │ ├── loop.py
│ │ │ │ └── ui/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── app.py
│ │ │ │ ├── benchmarks/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base.py
│ │ │ │ │ ├── bioprobench.py
│ │ │ │ │ ├── chemcotbench.py
│ │ │ │ │ ├── financeiq.py
│ │ │ │ │ ├── panorama.py
│ │ │ │ │ └── tablebench.py
│ │ │ │ ├── components.py
│ │ │ │ ├── config.py
│ │ │ │ ├── data_loader.py
│ │ │ │ └── ft_summary.py
│ │ │ └── share/
│ │ │ ├── eval.py
│ │ │ └── prompts.yaml
│ │ ├── general_model/
│ │ │ └── general_model.py
│ │ ├── kaggle/
│ │ │ ├── conf.py
│ │ │ └── loop.py
│ │ ├── qlib_rd_loop/
│ │ │ ├── conf.py
│ │ │ ├── factor.py
│ │ │ ├── factor_from_report.py
│ │ │ ├── model.py
│ │ │ ├── prompts.yaml
│ │ │ └── quant.py
│ │ ├── rl/
│ │ │ ├── conf.py
│ │ │ ├── loop.py
│ │ │ └── ui/
│ │ │ ├── __init__.py
│ │ │ ├── app.py
│ │ │ ├── components.py
│ │ │ ├── config.py
│ │ │ ├── data_loader.py
│ │ │ └── rl_summary.py
│ │ └── utils/
│ │ ├── ape.py
│ │ ├── health_check.py
│ │ ├── info.py
│ │ ├── prompts.yaml
│ │ ├── ws.py
│ │ └── ws_ft.py
│ ├── components/
│ │ ├── agent/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── context7/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── mcp/
│ │ │ │ └── __init__.py
│ │ │ └── rag/
│ │ │ ├── __init__.py
│ │ │ └── conf.py
│ │ ├── benchmark/
│ │ │ ├── __init__.py
│ │ │ ├── conf.py
│ │ │ ├── configs/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── models.yaml
│ │ │ │ └── opencompass_template.yaml
│ │ │ ├── eval_method.py
│ │ │ ├── example.json
│ │ │ └── utils.py
│ │ ├── coder/
│ │ │ ├── CoSTEER/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolvable_subjects.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── knowledge_management.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── task.py
│ │ │ ├── data_science/
│ │ │ │ ├── conf.py
│ │ │ │ ├── ensemble/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── conf.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── ensemble_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── feature/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── feature_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── model/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── model_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── pipeline/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── submission_format_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ └── prompts.yaml
│ │ │ │ ├── raw_data_loader/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── conf.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── eval_tests/
│ │ │ │ │ │ └── data_loader_test.txt
│ │ │ │ │ ├── exp.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── test.py
│ │ │ │ ├── share/
│ │ │ │ │ ├── doc.py
│ │ │ │ │ ├── ds_costeer.py
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── notebook.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── util.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workflow/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── eval.py
│ │ │ │ ├── eval_tests/
│ │ │ │ │ └── submission_format_test.txt
│ │ │ │ ├── exp.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── test.py
│ │ │ ├── factor_coder/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── config.py
│ │ │ │ ├── eva_utils.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── factor.py
│ │ │ │ ├── factor_execution_template.txt
│ │ │ │ └── prompts.yaml
│ │ │ ├── finetune/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ ├── eval.py
│ │ │ │ ├── exp.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── unified_validator.py
│ │ │ ├── model_coder/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark/
│ │ │ │ │ ├── eval.py
│ │ │ │ │ ├── gt_code/
│ │ │ │ │ │ ├── A-DGN.py
│ │ │ │ │ │ ├── dirgnn.py
│ │ │ │ │ │ ├── gpsconv.py
│ │ │ │ │ │ ├── linkx.py
│ │ │ │ │ │ ├── pmlp.py
│ │ │ │ │ │ └── visnet.py
│ │ │ │ │ └── model_dict.json
│ │ │ │ ├── conf.py
│ │ │ │ ├── eva_utils.py
│ │ │ │ ├── evaluators.py
│ │ │ │ ├── evolving_strategy.py
│ │ │ │ ├── gt_code.py
│ │ │ │ ├── model.py
│ │ │ │ ├── model_execute_template_v1.txt
│ │ │ │ ├── model_execute_template_v2.txt
│ │ │ │ ├── one_shot/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── prompt.yaml
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── task_loader.py
│ │ │ └── rl/
│ │ │ ├── __init__.py
│ │ │ ├── costeer.py
│ │ │ └── prompts.yaml
│ │ ├── document_reader/
│ │ │ └── document_reader.py
│ │ ├── interactor/
│ │ │ └── __init__.py
│ │ ├── knowledge_management/
│ │ │ ├── graph.py
│ │ │ └── vector_base.py
│ │ ├── loader/
│ │ │ ├── experiment_loader.py
│ │ │ └── task_loader.py
│ │ ├── proposal/
│ │ │ ├── __init__.py
│ │ │ └── prompts.yaml
│ │ ├── runner/
│ │ │ └── __init__.py
│ │ └── workflow/
│ │ ├── conf.py
│ │ └── rd_loop.py
│ ├── core/
│ │ ├── conf.py
│ │ ├── developer.py
│ │ ├── evaluation.py
│ │ ├── evolving_agent.py
│ │ ├── evolving_framework.py
│ │ ├── exception.py
│ │ ├── experiment.py
│ │ ├── interactor.py
│ │ ├── knowledge_base.py
│ │ ├── prompts.py
│ │ ├── proposal.py
│ │ ├── scenario.py
│ │ └── utils.py
│ ├── log/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── conf.py
│ │ ├── logger.py
│ │ ├── mle_summary.py
│ │ ├── server/
│ │ │ ├── README.md
│ │ │ ├── app.py
│ │ │ └── debug_app.py
│ │ ├── storage.py
│ │ ├── timer.py
│ │ ├── ui/
│ │ │ ├── __init__.py
│ │ │ ├── aide.py
│ │ │ ├── app.py
│ │ │ ├── conf.py
│ │ │ ├── ds_summary.py
│ │ │ ├── ds_trace.py
│ │ │ ├── ds_user_interact.py
│ │ │ ├── dsapp.py
│ │ │ ├── llm_st.py
│ │ │ ├── qlib_report_figure.py
│ │ │ ├── st_fixed_container.py
│ │ │ ├── storage.py
│ │ │ ├── utils.py
│ │ │ └── web.py
│ │ └── utils/
│ │ ├── __init__.py
│ │ └── folder.py
│ ├── oai/
│ │ ├── backend/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── deprec.py
│ │ │ ├── litellm.py
│ │ │ └── pydantic_ai.py
│ │ ├── llm_conf.py
│ │ ├── llm_utils.py
│ │ └── utils/
│ │ └── embedding.py
│ ├── scenarios/
│ │ ├── data_science/
│ │ │ ├── __init__.py
│ │ │ ├── debug/
│ │ │ │ └── data.py
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── runner/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── eval.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── eval_tests/
│ │ │ │ └── mle_submission_format_test.txt
│ │ │ ├── example/
│ │ │ │ ├── README.md
│ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ ├── description.md
│ │ │ │ │ └── sample.py
│ │ │ │ ├── eval/
│ │ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ │ ├── grade.py
│ │ │ │ │ │ └── valid.py
│ │ │ │ │ └── playground-series-s4e9/
│ │ │ │ │ ├── grade.py
│ │ │ │ │ └── valid.py
│ │ │ │ ├── playground-series-s4e9/
│ │ │ │ │ └── description.md
│ │ │ │ └── source_data/
│ │ │ │ ├── arf-12-hours-prediction-task/
│ │ │ │ │ └── prepare.py
│ │ │ │ └── playground-series-s4e9/
│ │ │ │ └── prepare.py
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ └── experiment.py
│ │ │ ├── interactor/
│ │ │ │ └── __init__.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── __init__.py
│ │ │ │ └── exp_gen/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── base.py
│ │ │ │ ├── diversity_strategy.py
│ │ │ │ ├── draft/
│ │ │ │ │ ├── draft.py
│ │ │ │ │ └── prompts_draft.yaml
│ │ │ │ ├── idea_pool.py
│ │ │ │ ├── merge.py
│ │ │ │ ├── merge.yaml
│ │ │ │ ├── naive.py
│ │ │ │ ├── naive.yaml
│ │ │ │ ├── package_info.py
│ │ │ │ ├── planner/
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── prompts_v2.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ ├── router/
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── select/
│ │ │ │ │ ├── expand.py
│ │ │ │ │ ├── prompts.yaml
│ │ │ │ │ └── submit.py
│ │ │ │ ├── trace_scheduler.py
│ │ │ │ └── utils.py
│ │ │ ├── scen/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── utils.py
│ │ │ ├── share.yaml
│ │ │ ├── sing_docker/
│ │ │ │ ├── Dockerfile
│ │ │ │ ├── entrypoint.sh
│ │ │ │ └── kaggle_environment.yaml
│ │ │ └── test_eval.py
│ │ ├── finetune/
│ │ │ ├── benchmark/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── benchmark.py
│ │ │ │ ├── configs/
│ │ │ │ │ ├── models.yaml
│ │ │ │ │ └── opencompass_template.yaml
│ │ │ │ ├── data/
│ │ │ │ │ ├── adaptor.py
│ │ │ │ │ ├── default.py
│ │ │ │ │ └── financeiq_gen.py
│ │ │ │ └── merge/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── merge.py
│ │ │ │ └── merge_model_template.yaml
│ │ │ ├── datasets/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── bioprobench/
│ │ │ │ │ └── README.md
│ │ │ │ ├── chemcot/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── __init__.py
│ │ │ │ ├── deepscaler/
│ │ │ │ │ └── README.md
│ │ │ │ ├── financeiq/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ └── split.py
│ │ │ │ ├── panorama/
│ │ │ │ │ └── README.md
│ │ │ │ └── tableinstruct/
│ │ │ │ └── README.md
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── download/
│ │ │ │ ├── __init__.py
│ │ │ │ └── hf.py
│ │ │ ├── env/
│ │ │ │ ├── conda/
│ │ │ │ │ ├── deepspeed/
│ │ │ │ │ │ ├── ds_z2_config.json
│ │ │ │ │ │ └── ds_z3_config.json
│ │ │ │ │ ├── llm_finetune_requirements.txt
│ │ │ │ │ └── opencompass_requirements.txt
│ │ │ │ └── docker/
│ │ │ │ ├── llm_finetune/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── opencompass/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── experiment.py
│ │ │ │ └── workspace.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ └── trace.py
│ │ │ ├── scen/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── docker_scripts/
│ │ │ │ │ └── extract_parameters.py
│ │ │ │ ├── llama_factory_manager.py
│ │ │ │ ├── memory_estimator.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── scenario.py
│ │ │ │ └── utils.py
│ │ │ ├── share.yaml
│ │ │ ├── train/
│ │ │ │ ├── eval.py
│ │ │ │ └── runner.py
│ │ │ └── utils.py
│ │ ├── general_model/
│ │ │ ├── prompts.yaml
│ │ │ └── scenario.py
│ │ ├── kaggle/
│ │ │ ├── README.md
│ │ │ ├── developer/
│ │ │ │ ├── coder.py
│ │ │ │ ├── feedback.py
│ │ │ │ └── runner.py
│ │ │ ├── docker/
│ │ │ │ ├── DS_docker/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── kaggle_docker/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── mle_bench_docker/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── README.md
│ │ │ │ ├── kaggle_experiment.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── scenario.py
│ │ │ │ ├── spaceship-titanic_template/
│ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ ├── feature/
│ │ │ │ │ │ └── feature.py
│ │ │ │ │ ├── model/
│ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ └── train.py
│ │ │ │ ├── templates/
│ │ │ │ │ ├── covid19-global-forecasting-week-1/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── digit-recognizer/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_nn.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── feedback-prize-english-language-learning/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── forest-cover-type-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ ├── train.py
│ │ │ │ │ │ └── train_past.py
│ │ │ │ │ ├── meta_tpl_deprecated/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_nn.py
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ └── model_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── new-york-city-taxi-fare-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_linear.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_linear.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── optiver-realized-volatility-prediction/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e11/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e14/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e16/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s3e26/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e5/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e8/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── playground-series-s4e9/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── sf-crime/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── spaceship-titanic/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── statoil-iceberg-classifier-challenge/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── tabular-playground-series-dec-2021/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── tabular-playground-series-may-2022/
│ │ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ │ ├── feature/
│ │ │ │ │ │ │ └── feature.py
│ │ │ │ │ │ ├── model/
│ │ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ │ └── train.py
│ │ │ │ │ └── ventilator-pressure-prediction/
│ │ │ │ │ ├── fea_share_preprocess.py
│ │ │ │ │ ├── feature/
│ │ │ │ │ │ └── feature.py
│ │ │ │ │ ├── model/
│ │ │ │ │ │ ├── model_randomforest.py
│ │ │ │ │ │ ├── model_xgboost.py
│ │ │ │ │ │ ├── select_lightgbm.py
│ │ │ │ │ │ ├── select_nn.py
│ │ │ │ │ │ ├── select_randomforest.py
│ │ │ │ │ │ └── select_xgboost.py
│ │ │ │ │ └── train.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workspace.py
│ │ │ ├── kaggle_crawler.py
│ │ │ ├── knowledge_management/
│ │ │ │ ├── README.md
│ │ │ │ ├── extract_knowledge.py
│ │ │ │ ├── graph.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ └── vector_base.py
│ │ │ ├── prompts.yaml
│ │ │ ├── proposal/
│ │ │ │ └── proposal.py
│ │ │ └── tpl_ex/
│ │ │ ├── aerial-cactus-identification/
│ │ │ │ ├── README.md
│ │ │ │ ├── ensemble.py
│ │ │ │ ├── feature.py
│ │ │ │ ├── load_data.py
│ │ │ │ ├── main.py
│ │ │ │ ├── model01.py
│ │ │ │ └── spec/
│ │ │ │ ├── data_loader.md
│ │ │ │ ├── ensemble.md
│ │ │ │ ├── feature.md
│ │ │ │ ├── model.md
│ │ │ │ └── workflow.md
│ │ │ └── meta/
│ │ │ └── spec.md
│ │ ├── qlib/
│ │ │ ├── developer/
│ │ │ │ ├── factor_coder.py
│ │ │ │ ├── factor_runner.py
│ │ │ │ ├── feedback.py
│ │ │ │ ├── model_coder.py
│ │ │ │ ├── model_runner.py
│ │ │ │ └── utils.py
│ │ │ ├── docker/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── factor_data_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ └── generate.py
│ │ │ │ ├── factor_experiment.py
│ │ │ │ ├── factor_from_report_experiment.py
│ │ │ │ ├── factor_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── conf_baseline.yaml
│ │ │ │ │ ├── conf_combined_factors.yaml
│ │ │ │ │ ├── conf_combined_factors_sota_model.yaml
│ │ │ │ │ └── read_exp_res.py
│ │ │ │ ├── model_experiment.py
│ │ │ │ ├── model_template/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── conf_baseline_factors_model.yaml
│ │ │ │ │ ├── conf_sota_factors_model.yaml
│ │ │ │ │ └── read_exp_res.py
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── quant_experiment.py
│ │ │ │ ├── utils.py
│ │ │ │ └── workspace.py
│ │ │ ├── factor_experiment_loader/
│ │ │ │ ├── json_loader.py
│ │ │ │ ├── pdf_loader.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── prompts.yaml
│ │ │ └── proposal/
│ │ │ ├── bandit.py
│ │ │ ├── factor_proposal.py
│ │ │ ├── model_proposal.py
│ │ │ └── quant_proposal.py
│ │ ├── rl/
│ │ │ ├── autorl_bench/
│ │ │ │ ├── .gitignore
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── agents/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── claude/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── codex/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── example_agent/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ ├── start.sh
│ │ │ │ │ │ └── train.py
│ │ │ │ │ ├── gemini/
│ │ │ │ │ │ ├── AGENTS.md
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── opencode/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── openhands/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ ├── rdagent/
│ │ │ │ │ │ ├── config.yaml
│ │ │ │ │ │ └── start.sh
│ │ │ │ │ └── registry.py
│ │ │ │ ├── benchmarks/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── alfworld/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── base_config.yaml
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ ├── eval.py
│ │ │ │ │ │ ├── react_prompts.json
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── alpacaeval/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── annotators_gpt52_fn/
│ │ │ │ │ │ │ ├── alpaca_eval_fn.txt
│ │ │ │ │ │ │ ├── annotations_seed0_configs.json
│ │ │ │ │ │ │ └── configs.yaml
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ ├── eval.py
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── deepsearchqa/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ └── eval.py
│ │ │ │ │ ├── gsm8k/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ └── description.md
│ │ │ │ │ ├── humaneval/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ ├── data.py
│ │ │ │ │ │ ├── description.md
│ │ │ │ │ │ └── requirements.txt
│ │ │ │ │ ├── smith/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── per_sample_eval.py
│ │ │ │ │ └── webshop/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── data.py
│ │ │ │ │ ├── description.md
│ │ │ │ │ ├── eval.py
│ │ │ │ │ └── requirements.txt
│ │ │ │ ├── conf.py
│ │ │ │ ├── core/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── evaluator.py
│ │ │ │ │ ├── instructions.md
│ │ │ │ │ ├── metrics.py
│ │ │ │ │ ├── opencompass.py
│ │ │ │ │ ├── server.py
│ │ │ │ │ ├── skill.md
│ │ │ │ │ ├── ui.py
│ │ │ │ │ └── utils.py
│ │ │ │ ├── requirements.txt
│ │ │ │ ├── run.py
│ │ │ │ └── test/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── test_benchmark.py
│ │ │ │ └── test_fixes.py
│ │ │ ├── dev/
│ │ │ │ ├── feedback.py
│ │ │ │ └── prompts.yaml
│ │ │ ├── env/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── conf.py
│ │ │ │ └── docker/
│ │ │ │ ├── base/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── evalplus/
│ │ │ │ │ └── Dockerfile
│ │ │ │ ├── gsm8k/
│ │ │ │ │ └── Dockerfile
│ │ │ │ └── miniwob/
│ │ │ │ └── Dockerfile
│ │ │ ├── experiment/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── experiment.py
│ │ │ │ └── workspace.py
│ │ │ ├── loop.py
│ │ │ ├── proposal/
│ │ │ │ ├── prompts.yaml
│ │ │ │ ├── proposal.py
│ │ │ │ └── trace.py
│ │ │ ├── scen/
│ │ │ │ └── scenario.py
│ │ │ └── train/
│ │ │ └── runner.py
│ │ └── shared/
│ │ ├── get_runtime_info.py
│ │ └── runtime_info.py
│ └── utils/
│ ├── __init__.py
│ ├── agent/
│ │ ├── __init__.py
│ │ ├── apply_patch.py
│ │ ├── ret.py
│ │ ├── tpl.py
│ │ ├── tpl.yaml
│ │ └── workflow.py
│ ├── blob/
│ │ ├── azsync.sh
│ │ └── gen_token.sh
│ ├── env.py
│ ├── fmt.py
│ ├── prompts.yaml
│ ├── qlib.py
│ ├── repo/
│ │ ├── README.md
│ │ ├── diff.py
│ │ └── repo_utils.py
│ └── workflow/
│ ├── __init__.py
│ ├── loop.py
│ ├── misc.py
│ └── tracking.py
├── requirements/
│ ├── docs.txt
│ ├── lint.txt
│ ├── package.txt
│ ├── test.txt
│ └── torch.txt
├── requirements.txt
├── test/
│ ├── finetune/
│ │ ├── test_benchmark.py
│ │ ├── test_benchmark_api.py
│ │ └── test_benchmark_tablebench.py
│ ├── notebook/
│ │ ├── test_notebook_converter.py
│ │ ├── test_util.py
│ │ └── testfiles/
│ │ ├── main.ipynb
│ │ ├── main.py
│ │ ├── main2.ipynb
│ │ ├── main2.py
│ │ ├── main_missing_main_fn.py
│ │ └── main_missing_sections.py
│ ├── oai/
│ │ ├── test_advanced.py
│ │ ├── test_base.py
│ │ ├── test_completion.py
│ │ ├── test_embedding_and_similarity.py
│ │ ├── test_llm_connectivity.py
│ │ ├── test_prefect_cache.py
│ │ └── test_pydantic.py
│ ├── qlib/
│ │ └── test_model_factor_proposal.py
│ ├── rl/
│ │ └── __init__.py
│ └── utils/
│ ├── README.md
│ ├── coder/
│ │ ├── test_CoSTEER.py
│ │ └── test_finetune_coder.py
│ ├── test_agent_infra.py
│ ├── test_conf.py
│ ├── test_env.py
│ ├── test_import.py
│ ├── test_kaggle.py
│ ├── test_misc.py
│ └── test_ws.py
└── web/
├── .gitignore
├── README.md
├── auto-imports.d.ts
├── components.d.ts
├── index.html
├── package.json
├── src/
│ ├── App.vue
│ ├── common/
│ │ ├── code-theme.css
│ │ ├── py-theme.css
│ │ └── reset.css
│ ├── components/
│ │ ├── chartBox.vue
│ │ ├── code.vue
│ │ ├── development.vue
│ │ ├── dialog.vue
│ │ ├── feedback.vue
│ │ ├── footer.vue
│ │ ├── kateX.vue
│ │ ├── lineChart.vue
│ │ ├── lineChartOne.vue
│ │ ├── loading-dot.vue
│ │ ├── loading.vue
│ │ ├── loop-component.vue
│ │ ├── markdown.vue
│ │ ├── markdownToHtml.vue
│ │ ├── navBar.vue
│ │ ├── research.vue
│ │ ├── saveImage.vue
│ │ ├── select-component.vue
│ │ ├── sm-select-component.vue
│ │ ├── step-component.vue
│ │ ├── svgIcon.vue
│ │ ├── swiper.vue
│ │ └── upload-progress.vue
│ ├── constants/
│ │ ├── mle-competitions.js
│ │ └── qlib.js
│ ├── main.ts
│ ├── router/
│ │ └── index.ts
│ ├── shims-vue.d.ts
│ ├── style.css
│ ├── utils/
│ │ ├── api.js
│ │ ├── crypto.js
│ │ ├── getAssets.ts
│ │ ├── request.js
│ │ └── snap.svg-min.js
│ ├── views/
│ │ ├── Home.vue
│ │ ├── Login.vue
│ │ ├── Playground.vue
│ │ ├── Playground1.vue
│ │ ├── PlaygroundPage.vue
│ │ └── ResultPage.vue
│ └── vite-env.d.ts
├── tsconfig.json
├── tsconfig.node.json
└── vite.config.ts
Showing preview only (280K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (3037 symbols across 506 files)
FILE: rdagent/app/CI/run.py
class CIError (line 43) | class CIError:
method to_dict (line 53) | def to_dict(self) -> dict[str, object]:
method __str__ (line 56) | def __str__(self) -> str:
class CIFeedback (line 61) | class CIFeedback(Feedback):
method statistics (line 64) | def statistics(self) -> dict[Literal["ruff", "mypy"], dict[str, int]]:
class FixRecord (line 73) | class FixRecord:
method to_dict (line 79) | def to_dict(self) -> dict[str, Any]:
class CodeFile (line 90) | class CodeFile:
method __init__ (line 91) | def __init__(self, path: Path | str) -> None:
method add_line_number (line 96) | def add_line_number(cls: CodeFile, code: list[str] | str, start: int =...
method remove_line_number (line 107) | def remove_line_number(cls: CodeFile, code: list[str] | str) -> list[s...
method load (line 117) | def load(self) -> None:
method get (line 126) | def get(
method apply_changes (line 159) | def apply_changes(self, changes: list[tuple[int, int, str]]) -> None:
method get_code_blocks (line 182) | def get_code_blocks(self, max_lines: int = 30) -> list[tuple[int, int]]:
method __str__ (line 214) | def __str__(self) -> str:
class Repo (line 218) | class Repo(EvolvableSubjects):
method __init__ (line 219) | def __init__(self, project_path: Path | str, excludes: list[Path] | No...
class RuffRule (line 255) | class RuffRule:
class RuffEvaluator (line 282) | class RuffEvaluator(Evaluator):
method __init__ (line 287) | def __init__(self, command: str | None = None) -> None:
method explain_rule (line 294) | def explain_rule(error_code: str) -> RuffRule:
method evaluate (line 307) | def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback:
class MypyEvaluator (line 358) | class MypyEvaluator(Evaluator):
method __init__ (line 359) | def __init__(self, command: str | None = None) -> None:
method evaluate (line 365) | def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback:
class MultiEvaluator (line 413) | class MultiEvaluator(Evaluator):
method __init__ (line 414) | def __init__(self, *evaluators: Evaluator) -> None:
method evaluate (line 417) | def evaluate(self, evo: Repo, **kwargs: dict) -> CIFeedback:
class CIEvoStr (line 431) | class CIEvoStr(EvolvingStrategy):
method evolve (line 432) | def evolve( # noqa: C901, PLR0912, PLR0915
class CIEvoAgent (line 696) | class CIEvoAgent(EvoAgent):
method __init__ (line 697) | def __init__(self, evolving_strategy: CIEvoStr) -> None:
method multistep_evolve (line 701) | def multistep_evolve(self, evo: Repo, eva: Evaluator) -> Repo:
FILE: rdagent/app/benchmark/factor/analysis.py
class BenchmarkAnalyzer (line 15) | class BenchmarkAnalyzer:
method __init__ (line 16) | def __init__(self, settings, only_correct_format=False):
method load_index_map (line 21) | def load_index_map(self):
method load_data (line 29) | def load_data(self, file_path):
method process_results (line 39) | def process_results(self, results):
method reformat_index (line 48) | def reformat_index(self, display_df):
method result_all_key_order (line 81) | def result_all_key_order(self, x):
method analyze_data (line 96) | def analyze_data(self, sum_df):
class Plotter (line 178) | class Plotter:
method change_fs (line 180) | def change_fs(font_size):
method plot_data (line 190) | def plot_data(data, file_name, title):
function main (line 204) | def main(
FILE: rdagent/app/cli.py
function ui (line 45) | def ui(port=19899, log_dir="", debug: bool = False, data_science: bool =...
function server_ui (line 65) | def server_ui(port=19899):
function ds_user_interact (line 74) | def ds_user_interact(port=19900):
function fin_factor_cli (line 83) | def fin_factor_cli(
function fin_model_cli (line 94) | def fin_model_cli(
function fin_quant_cli (line 105) | def fin_quant_cli(
function fin_factor_report_cli (line 116) | def fin_factor_report_cli(
function general_model_cli (line 126) | def general_model_cli(report_file_path: str):
function data_science_cli (line 131) | def data_science_cli(
function llm_finetune_cli (line 150) | def llm_finetune_cli(
function grade_summary_cli (line 177) | def grade_summary_cli(log_folder: str):
function health_check_cli (line 186) | def health_check_cli(
function collect_info_cli (line 195) | def collect_info_cli():
FILE: rdagent/app/data_science/conf.py
class DataScienceBasePropSetting (line 9) | class DataScienceBasePropSetting(KaggleBasePropSetting):
FILE: rdagent/app/data_science/loop.py
function main (line 13) | def main(
FILE: rdagent/app/finetune/data_science/conf.py
class DSFinetuneScen (line 9) | class DSFinetuneScen(ExtendedBaseSettings):
function update_settings (line 30) | def update_settings(competition: str):
FILE: rdagent/app/finetune/data_science/loop.py
function main (line 13) | def main(
FILE: rdagent/app/finetune/data_science/scen.py
class DSFinetuneScen (line 11) | class DSFinetuneScen(DataScienceScen):
method _get_data_folder_description (line 14) | def _get_data_folder_description(self) -> str:
FILE: rdagent/app/finetune/llm/conf.py
class LLMFinetunePropSetting (line 8) | class LLMFinetunePropSetting(ExtendedBaseSettings):
FILE: rdagent/app/finetune/llm/loop.py
function main (line 17) | def main(
FILE: rdagent/app/finetune/llm/ui/app.py
function get_job_options (line 28) | def get_job_options(base_path: Path) -> list[str]:
function main (line 66) | def main():
FILE: rdagent/app/finetune/llm/ui/benchmarks/__init__.py
function get_core_metric_score (line 22) | def get_core_metric_score(benchmark_name: str, accuracy_summary: dict) -...
FILE: rdagent/app/finetune/llm/ui/benchmarks/base.py
class BenchmarkProcessor (line 6) | class BenchmarkProcessor(ABC):
method match (line 28) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 34) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
method is_higher_better (line 49) | def is_higher_better(cls, metric_name: str) -> bool:
FILE: rdagent/app/finetune/llm/ui/benchmarks/bioprobench.py
class BioProBenchProcessor (line 6) | class BioProBenchProcessor(BenchmarkProcessor):
method match (line 17) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 21) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
FILE: rdagent/app/finetune/llm/ui/benchmarks/chemcotbench.py
class ChemCotBenchProcessor (line 6) | class ChemCotBenchProcessor(BenchmarkProcessor):
method match (line 43) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 47) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
FILE: rdagent/app/finetune/llm/ui/benchmarks/financeiq.py
class FinanceIQProcessor (line 6) | class FinanceIQProcessor(BenchmarkProcessor):
method match (line 10) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 14) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
FILE: rdagent/app/finetune/llm/ui/benchmarks/panorama.py
class PanoramaProcessor (line 6) | class PanoramaProcessor(BenchmarkProcessor):
method match (line 16) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 20) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
FILE: rdagent/app/finetune/llm/ui/benchmarks/tablebench.py
class TableBenchProcessor (line 6) | class TableBenchProcessor(BenchmarkProcessor):
method match (line 24) | def match(cls, benchmark_name: str) -> bool:
method get_core_metric (line 28) | def get_core_metric(cls, accuracy_summary: dict) -> tuple[str, float, ...
FILE: rdagent/app/finetune/llm/ui/components.py
function convert_latex_for_streamlit (line 17) | def convert_latex_for_streamlit(text: str) -> str:
function format_duration (line 32) | def format_duration(seconds: float | None) -> str:
function render_session (line 42) | def render_session(session: Session, show_types: list[str]) -> None:
function render_loop (line 58) | def render_loop(loop: Loop, show_types: list[str]) -> None:
function _render_loop_content (line 124) | def _render_loop_content(loop: Loop, show_types: list[str]) -> None:
function render_evo_loop (line 158) | def render_evo_loop(evo: EvoLoop, show_types: list[str]) -> None:
function render_event (line 170) | def render_event(event: Event) -> None:
function render_scenario (line 208) | def render_scenario(content: Any) -> None:
function render_dataset_selection (line 279) | def render_dataset_selection(content: Any) -> None:
function render_hypothesis (line 296) | def render_hypothesis(content: Any) -> None:
function render_settings (line 306) | def render_settings(content: Any) -> None:
function render_llm_call (line 313) | def render_llm_call(content: Any) -> None:
function render_template (line 354) | def render_template(content: Any) -> None:
function render_experiment (line 378) | def render_experiment(content: Any) -> None:
function render_code (line 392) | def render_code(content: Any) -> None:
function _extract_evaluator_name (line 410) | def _extract_evaluator_name(title: str) -> str:
function _render_single_feedback (line 416) | def _render_single_feedback(fb: Any, evaluator_name: str = "") -> None:
function render_docker_exec (line 461) | def render_docker_exec(content: Any, event_title: str = "") -> None:
function render_feedback (line 530) | def render_feedback(content: Any) -> None:
function render_token (line 572) | def render_token(content: Any) -> None:
function render_time_info (line 585) | def render_time_info(content: Any) -> None:
function render_generic (line 593) | def render_generic(content: Any) -> None:
function render_training_result (line 602) | def render_training_result(result: dict) -> None:
function render_benchmark_result (line 665) | def render_benchmark_result(content: dict) -> None:
function render_summary (line 709) | def render_summary(summary: dict) -> None:
FILE: rdagent/app/finetune/llm/ui/data_loader.py
class Event (line 19) | class Event:
method time_str (line 34) | def time_str(self) -> str:
class EvoLoop (line 39) | class EvoLoop:
class Loop (line 48) | class Loop:
class Session (line 59) | class Session:
function extract_loop_id (line 66) | def extract_loop_id(tag: str) -> int | None:
function extract_evo_id (line 71) | def extract_evo_id(tag: str) -> int | None:
function extract_stage (line 76) | def extract_stage(tag: str) -> str:
function get_valid_sessions (line 88) | def get_valid_sessions(log_folder: Path) -> list[str]:
function parse_event (line 98) | def parse_event(tag: str, content: Any, timestamp: datetime) -> Event | ...
function load_ft_session (line 365) | def load_ft_session(log_path: Path) -> Session:
function get_summary (line 423) | def get_summary(session: Session) -> dict:
FILE: rdagent/app/finetune/llm/ui/ft_summary.py
function is_valid_task (line 16) | def is_valid_task(task_path: Path) -> bool:
function get_loop_dirs (line 21) | def get_loop_dirs(task_path: Path) -> list[Path]:
function extract_benchmark_score (line 27) | def extract_benchmark_score(loop_path: Path, split: str = "") -> tuple[s...
function extract_benchmark_scores (line 60) | def extract_benchmark_scores(loop_path: Path) -> dict[str, tuple[str, fl...
function extract_baseline_score (line 73) | def extract_baseline_score(task_path: Path) -> tuple[str, float] | None:
function extract_baseline_scores (line 101) | def extract_baseline_scores(task_path: Path) -> dict[str, tuple[str, flo...
function get_loop_status (line 144) | def get_loop_status(
function get_max_loops (line 214) | def get_max_loops(job_path: Path) -> int:
function get_job_summary_df (line 224) | def get_job_summary_df(job_path: Path) -> tuple[pd.DataFrame, pd.DataFra...
function style_status_cell (line 318) | def style_status_cell(val: str, decision: bool | None = None) -> str:
function style_df_with_decisions (line 364) | def style_df_with_decisions(df: pd.DataFrame, decisions_df: pd.DataFrame...
function render_job_summary (line 386) | def render_job_summary(job_path: Path, is_root: bool = False) -> None:
function extract_full_benchmark (line 429) | def extract_full_benchmark(loop_path: Path, split: str = "") -> dict | N...
function extract_baseline_full_benchmark (line 457) | def extract_baseline_full_benchmark(task_path: Path, split: str = "valid...
function get_task_full_benchmark_df (line 490) | def get_task_full_benchmark_df(task_path: Path, split: str) -> pd.DataFr...
function render_task_detail_selector (line 554) | def render_task_detail_selector(job_path: Path) -> None:
FILE: rdagent/app/finetune/share/eval.py
class PrevModelLoadEvaluator (line 13) | class PrevModelLoadEvaluator(CoSTEEREvaluator):
method __init__ (line 16) | def __init__(self, scen: Scenario):
method evaluate (line 19) | def evaluate(
FILE: rdagent/app/general_model/general_model.py
function extract_models_and_implement (line 14) | def extract_models_and_implement(report_file_path: str) -> None:
FILE: rdagent/app/kaggle/conf.py
class KaggleBasePropSetting (line 6) | class KaggleBasePropSetting(ExtendedBaseSettings):
FILE: rdagent/app/kaggle/loop.py
class KaggleRDLoop (line 29) | class KaggleRDLoop(RDLoop):
method __init__ (line 30) | def __init__(self, PROP_SETTING: BasePropSetting):
method coding (line 58) | def coding(self, prev_out: dict[str, Any]):
method running (line 71) | def running(self, prev_out: dict[str, Any]):
function main (line 114) | def main(path=None, step_n=None, competition=None):
FILE: rdagent/app/qlib_rd_loop/conf.py
class ModelBasePropSetting (line 8) | class ModelBasePropSetting(BasePropSetting):
class FactorBasePropSetting (line 52) | class FactorBasePropSetting(BasePropSetting):
class FactorFromReportPropSetting (line 96) | class FactorFromReportPropSetting(FactorBasePropSetting):
class QuantBasePropSetting (line 112) | class QuantBasePropSetting(BasePropSetting):
FILE: rdagent/app/qlib_rd_loop/factor.py
class FactorRDLoop (line 17) | class FactorRDLoop(RDLoop):
method running (line 21) | def running(self, prev_out: dict[str, Any]):
function main (line 30) | def main(
FILE: rdagent/app/qlib_rd_loop/factor_from_report.py
function generate_hypothesis (line 26) | def generate_hypothesis(factor_result: dict, report_content: str) -> str:
function extract_hypothesis_and_exp_from_reports (line 61) | def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Ql...
class FactorReportLoop (line 97) | class FactorReportLoop(FactorRDLoop, metaclass=LoopMeta):
method __init__ (line 98) | def __init__(self, report_folder: str = None):
method direct_exp_gen (line 112) | async def direct_exp_gen(self, prev_out: dict[str, Any]):
method coding (line 137) | def coding(self, prev_out: dict[str, Any]):
function main (line 143) | def main(report_folder=None, path=None, all_duration=None, checkout=True):
FILE: rdagent/app/qlib_rd_loop/model.py
class ModelRDLoop (line 14) | class ModelRDLoop(RDLoop):
function main (line 18) | def main(
FILE: rdagent/app/qlib_rd_loop/quant.py
class QuantRDLoop (line 30) | class QuantRDLoop(RDLoop):
method __init__ (line 36) | def __init__(self, PROP_SETTING: BasePropSetting):
method direct_exp_gen (line 74) | async def direct_exp_gen(self, prev_out: dict[str, Any]):
method coding (line 92) | def coding(self, prev_out: dict[str, Any]):
method running (line 100) | def running(self, prev_out: dict[str, Any]):
method feedback (line 111) | def feedback(self, prev_out: dict[str, Any]):
function main (line 131) | def main(
FILE: rdagent/app/rl/conf.py
class RLPostTrainingPropSetting (line 8) | class RLPostTrainingPropSetting(ExtendedBaseSettings):
FILE: rdagent/app/rl/loop.py
function main (line 16) | def main(
FILE: rdagent/app/rl/ui/app.py
function _safe_resolve (line 23) | def _safe_resolve(user_input: str | None, safe_root: Path) -> Path:
function get_job_options (line 46) | def get_job_options(base_path: Path) -> list[str]:
function main (line 75) | def main():
FILE: rdagent/app/rl/ui/components.py
function format_duration (line 14) | def format_duration(seconds: float | None) -> str:
function render_session (line 24) | def render_session(session: Session, show_types: list[str]) -> None:
function render_loop (line 38) | def render_loop(loop: Loop, show_types: list[str]) -> None:
function render_event (line 95) | def render_event(event: Event) -> None:
function render_scenario (line 127) | def render_scenario(content: Any) -> None:
function render_hypothesis (line 135) | def render_hypothesis(content: Any) -> None:
function render_settings (line 144) | def render_settings(content: Any) -> None:
function render_llm_call (line 151) | def render_llm_call(content: Any) -> None:
function render_template (line 181) | def render_template(content: Any) -> None:
function render_experiment (line 200) | def render_experiment(content: Any) -> None:
function render_code (line 211) | def render_code(content: Any) -> None:
function render_docker_exec (line 228) | def render_docker_exec(content: Any) -> None:
function render_feedback (line 245) | def render_feedback(content: Any) -> None:
function render_token (line 273) | def render_token(content: Any) -> None:
function render_time_info (line 286) | def render_time_info(content: Any) -> None:
function render_generic (line 294) | def render_generic(content: Any) -> None:
function render_summary (line 303) | def render_summary(summary: dict) -> None:
FILE: rdagent/app/rl/ui/data_loader.py
class Event (line 21) | class Event:
method time_str (line 35) | def time_str(self) -> str:
class Loop (line 40) | class Loop:
class Session (line 51) | class Session:
function extract_loop_id (line 58) | def extract_loop_id(tag: str) -> int | None:
function extract_stage (line 63) | def extract_stage(tag: str) -> str:
function get_valid_sessions (line 75) | def get_valid_sessions(log_folder: Path) -> list[str]:
function parse_event (line 85) | def parse_event(tag: str, content: Any, timestamp: datetime) -> Event | ...
function load_session (line 237) | def load_session(log_path: Path) -> Session:
function get_summary (line 285) | def get_summary(session: Session) -> dict:
FILE: rdagent/app/rl/ui/rl_summary.py
function is_valid_task (line 13) | def is_valid_task(task_path: Path) -> bool:
function get_loop_dirs (line 18) | def get_loop_dirs(task_path: Path) -> list[Path]:
function get_loop_status (line 24) | def get_loop_status(task_path: Path, loop_id: int) -> tuple[str, bool | ...
function get_max_loops (line 64) | def get_max_loops(job_path: Path) -> int:
function get_job_summary_df (line 74) | def get_job_summary_df(job_path: Path) -> tuple[pd.DataFrame, pd.DataFra...
function style_status_cell (line 119) | def style_status_cell(val: str, decision: bool | None = None) -> str:
function style_df_with_decisions (line 136) | def style_df_with_decisions(df: pd.DataFrame, decisions_df: pd.DataFrame):
function render_job_summary (line 152) | def render_job_summary(job_path: Path, is_root: bool = False) -> None:
FILE: rdagent/app/utils/ape.py
function get_llm_qa (line 11) | def get_llm_qa(file_path):
FILE: rdagent/app/utils/health_check.py
function check_docker_status (line 14) | def check_docker_status() -> None:
function is_port_in_use (line 32) | def is_port_in_use(port):
function check_and_list_free_ports (line 37) | def check_and_list_free_ports(start_port=19899, max_ports=10) -> None:
function test_chat (line 51) | def test_chat(chat_model, chat_api_key, chat_api_base):
function test_embedding (line 78) | def test_embedding(embedding_model, embedding_api_key, embedding_api_base):
function env_check (line 94) | def env_check():
function health_check (line 135) | def health_check(
FILE: rdagent/app/utils/info.py
function sys_info (line 14) | def sys_info():
function python_info (line 27) | def python_info():
function docker_info (line 34) | def docker_info():
function rdagent_info (line 52) | def rdagent_info():
function collect_info (line 80) | def collect_info():
FILE: rdagent/app/utils/ws.py
function run (line 13) | def run(competition: str, cmd: str, local_path: str = "./", mount_path: ...
FILE: rdagent/app/utils/ws_ft.py
function run (line 13) | def run(
FILE: rdagent/components/agent/base.py
class BaseAgent (line 12) | class BaseAgent:
method __init__ (line 15) | def __init__(self, system_prompt: str, toolsets: list[str]): ...
method query (line 18) | def query(self, query: str) -> str: ...
class PAIAgent (line 21) | class PAIAgent(BaseAgent):
method __init__ (line 29) | def __init__(
method _run_query (line 56) | def _run_query(self, query: str) -> str:
method query (line 64) | def query(self, query: str) -> str:
FILE: rdagent/components/agent/context7/__init__.py
class Agent (line 11) | class Agent(PAIAgent):
method __init__ (line 16) | def __init__(self):
method _build_enhanced_query (line 25) | def _build_enhanced_query(self, error_message: str, full_code: Optiona...
method query (line 46) | def query(self, query: str) -> str:
FILE: rdagent/components/agent/context7/conf.py
class Settings (line 17) | class Settings(BaseSettings):
FILE: rdagent/components/agent/rag/__init__.py
class Agent (line 8) | class Agent(PAIAgent):
method __init__ (line 13) | def __init__(self, system_prompt: str | None = None):
FILE: rdagent/components/agent/rag/conf.py
class Settings (line 10) | class Settings(BaseSettings):
FILE: rdagent/components/benchmark/conf.py
class BenchmarkSettings (line 10) | class BenchmarkSettings(ExtendedBaseSettings):
class Config (line 11) | class Config:
FILE: rdagent/components/benchmark/eval_method.py
class TestCase (line 31) | class TestCase:
method __init__ (line 32) | def __init__(
class TestCases (line 41) | class TestCases:
method __init__ (line 42) | def __init__(self, test_case_l: list[TestCase] = []):
method __getitem__ (line 46) | def __getitem__(self, item):
method __len__ (line 49) | def __len__(self):
method get_exp (line 52) | def get_exp(self):
method target_task (line 56) | def target_task(self):
method ground_truth (line 60) | def ground_truth(self):
class BaseEval (line 64) | class BaseEval:
method __init__ (line 69) | def __init__(
method load_cases_to_eval (line 90) | def load_cases_to_eval(
method eval_case (line 105) | def eval_case(
class FactorImplementEval (line 140) | class FactorImplementEval(BaseEval):
method __init__ (line 141) | def __init__(
method develop (line 160) | def develop(self):
method eval (line 181) | def eval(self, gen_factor_l_all_rounds):
method summarize_res (line 200) | def summarize_res(res: EVAL_RES) -> pd.DataFrame:
FILE: rdagent/components/benchmark/utils.py
function _guess_dataset_var (line 13) | def _guess_dataset_var(mod_path: str) -> str:
function build_dataset_imports_explicit (line 30) | def build_dataset_imports_explicit(dataset_imports: str | Iterable[str])...
FILE: rdagent/components/coder/CoSTEER/__init__.py
class CoSTEER (line 20) | class CoSTEER(Developer[Experiment]):
method __init__ (line 21) | def __init__(
method get_develop_max_seconds (line 69) | def get_develop_max_seconds(self) -> int | None:
method _get_last_fb (line 76) | def _get_last_fb(self) -> CoSTEERMultiFeedback:
method should_use_new_evo (line 82) | def should_use_new_evo(self, base_fb: CoSTEERMultiFeedback | None, new...
method develop (line 93) | def develop(self, exp: Experiment) -> Experiment:
method _exp_postprocess_by_feedback (line 157) | def _exp_postprocess_by_feedback(self, evo: Experiment, feedback: CoST...
FILE: rdagent/components/coder/CoSTEER/config.py
class CoSTEERSettings (line 6) | class CoSTEERSettings(ExtendedBaseSettings):
class Config (line 9) | class Config:
FILE: rdagent/components/coder/CoSTEER/evaluators.py
class CoSTEERSingleFeedback (line 32) | class CoSTEERSingleFeedback(Feedback):
method val_and_update_init_dict (line 54) | def val_and_update_init_dict(data: dict) -> dict:
method merge (line 86) | def merge(cls, feedback_li: list["CoSTEERSingleFeedback"]) -> "CoSTEER...
method __str__ (line 110) | def __str__(self) -> str:
method __bool__ (line 121) | def __bool__(self):
class CoSTEERSingleFeedbackDeprecated (line 125) | class CoSTEERSingleFeedbackDeprecated(CoSTEERSingleFeedback):
method __init__ (line 128) | def __init__(
method execution (line 155) | def execution(self):
method execution (line 159) | def execution(self, value):
method return_checking (line 163) | def return_checking(self):
method return_checking (line 169) | def return_checking(self, value):
method code (line 176) | def code(self):
method code (line 180) | def code(self, value):
method __str__ (line 183) | def __str__(self) -> str:
class CoSTEERMultiFeedback (line 199) | class CoSTEERMultiFeedback(Feedback):
method __init__ (line 202) | def __init__(self, feedback_list: List[CoSTEERSingleFeedback]) -> None:
method __getitem__ (line 205) | def __getitem__(self, index: int) -> CoSTEERSingleFeedback:
method __len__ (line 208) | def __len__(self) -> int:
method append (line 211) | def append(self, feedback: CoSTEERSingleFeedback) -> None:
method __iter__ (line 214) | def __iter__(self):
method is_acceptable (line 217) | def is_acceptable(self) -> bool:
method finished (line 220) | def finished(self) -> bool:
method __bool__ (line 227) | def __bool__(self) -> bool:
class CoSTEEREvaluator (line 231) | class CoSTEEREvaluator(Evaluator):
method __init__ (line 232) | def __init__(
method evaluate (line 243) | def evaluate(
class CoSTEERMultiEvaluator (line 253) | class CoSTEERMultiEvaluator(RAGEvaluator):
method __init__ (line 256) | def __init__(self, single_evaluator: CoSTEEREvaluator | list[CoSTEEREv...
method evaluate_iter (line 261) | def evaluate_iter(
FILE: rdagent/components/coder/CoSTEER/evolvable_subjects.py
class EvolvingItem (line 6) | class EvolvingItem(Experiment, EvolvableSubjects):
method __init__ (line 11) | def __init__(
method from_experiment (line 28) | def from_experiment(cls, exp: Experiment) -> "EvolvingItem":
FILE: rdagent/components/coder/CoSTEER/evolving_strategy.py
class MultiProcessEvolvingStrategy (line 22) | class MultiProcessEvolvingStrategy(EvolvingStrategy):
method __init__ (line 25) | def __init__(self, scen: Scenario, settings: CoSTEERSettings, improve_...
method implement_one_task (line 30) | def implement_one_task(
method implement_func_list (line 61) | def implement_func_list(self) -> list[Callable]:
method assign_code_list_to_evo (line 71) | def assign_code_list_to_evo(self, code_list: list[dict], evo: Evolving...
method assign_code_list_to_evo (line 86) | def assign_code_list_to_evo(self, code_list: list[dict | None], evo) -...
method evolve_iter (line 108) | def evolve_iter(
FILE: rdagent/components/coder/CoSTEER/knowledge_management.py
class CoSTEERKnowledge (line 36) | class CoSTEERKnowledge(Knowledge):
method __init__ (line 37) | def __init__(
method get_implementation_and_feedback_str (line 47) | def get_implementation_and_feedback_str(self) -> str:
class CoSTEERRAGStrategy (line 55) | class CoSTEERRAGStrategy(RAGStrategy):
method __init__ (line 56) | def __init__(self, *args, dump_knowledge_base_path: Path = None, **kwa...
method load_or_init_knowledge_base (line 60) | def load_or_init_knowledge_base(
method dump_knowledge_base (line 82) | def dump_knowledge_base(self):
method load_dumped_knowledge_base (line 91) | def load_dumped_knowledge_base(self, *args, **kwargs):
class CoSTEERQueriedKnowledge (line 102) | class CoSTEERQueriedKnowledge(QueriedKnowledge):
method __init__ (line 135) | def __init__(self, success_task_to_knowledge_dict: dict = {}, failed_t...
class CoSTEERKnowledgeBaseV1 (line 140) | class CoSTEERKnowledgeBaseV1(EvolvingKnowledgeBase):
method __init__ (line 141) | def __init__(self, path: str | Path = None) -> None:
method query (line 148) | def query(self) -> CoSTEERQueriedKnowledge | None:
class CoSTEERQueriedKnowledgeV1 (line 155) | class CoSTEERQueriedKnowledgeV1(CoSTEERQueriedKnowledge):
method __init__ (line 156) | def __init__(
class CoSTEERRAGStrategyV1 (line 168) | class CoSTEERRAGStrategyV1(CoSTEERRAGStrategy):
method __init__ (line 171) | def __init__(self, settings: CoSTEERSettings, *args, **kwargs) -> None:
method generate_knowledge (line 176) | def generate_knowledge(
method query (line 219) | def query(
class CoSTEERQueriedKnowledgeV2 (line 281) | class CoSTEERQueriedKnowledgeV2(CoSTEERQueriedKnowledgeV1):
method __init__ (line 339) | def __init__(
class CoSTEERRAGStrategyV2 (line 354) | class CoSTEERRAGStrategyV2(CoSTEERRAGStrategy):
method __init__ (line 355) | def __init__(self, settings: CoSTEERSettings, *args, **kwargs) -> None:
method generate_knowledge (line 360) | def generate_knowledge(
method query (line 431) | def query(self, evo: EvolvableSubjects, evolving_trace: list[EvoStep])...
method analyze_component (line 457) | def analyze_component(
method analyze_error (line 488) | def analyze_error(
method former_trace_query (line 530) | def former_trace_query(
method component_query (line 593) | def component_query(
method error_query (line 723) | def error_query(
class CoSTEERKnowledgeBaseV2 (line 852) | class CoSTEERKnowledgeBaseV2(EvolvingKnowledgeBase):
method __init__ (line 853) | def __init__(self, init_component_list=None, path: str | Path = None) ...
method get_all_nodes_by_label (line 881) | def get_all_nodes_by_label(self, label: str) -> list[UndirectedNode]:
method update_success_task (line 884) | def update_success_task(
method query (line 926) | def query(self):
method graph_get_node_by_content (line 929) | def graph_get_node_by_content(self, content: str) -> UndirectedNode:
method graph_query_by_content (line 932) | def graph_query_by_content(
method graph_query_by_node (line 974) | def graph_query_by_node(
method graph_query_by_intersection (line 1009) | def graph_query_by_intersection(
FILE: rdagent/components/coder/CoSTEER/task.py
class CoSTEERTask (line 4) | class CoSTEERTask(Task):
method __init__ (line 5) | def __init__(self, base_code: str = None, *args, **kwargs) -> None:
FILE: rdagent/components/coder/data_science/conf.py
class DSCoderCoSTEERSettings (line 16) | class DSCoderCoSTEERSettings(CoSTEERSettings):
class Config (line 19) | class Config:
function get_ds_env (line 41) | def get_ds_env(
function get_clear_ws_cmd (line 78) | def get_clear_ws_cmd(stage: Literal["before_training", "before_inference...
FILE: rdagent/components/coder/data_science/ensemble/__init__.py
class EnsembleMultiProcessEvolvingStrategy (line 43) | class EnsembleMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 44) | def implement_one_task(
method assign_code_list_to_evo (line 127) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class EnsembleCoSTEER (line 144) | class EnsembleCoSTEER(DSCoSTEER):
method __init__ (line 145) | def __init__(
FILE: rdagent/components/coder/data_science/ensemble/eval.py
class EnsembleCoSTEEREvaluator (line 24) | class EnsembleCoSTEEREvaluator(CoSTEEREvaluator):
method evaluate (line 25) | def evaluate(
FILE: rdagent/components/coder/data_science/ensemble/exp.py
class EnsembleTask (line 12) | class EnsembleTask(CoSTEERTask):
FILE: rdagent/components/coder/data_science/ensemble/test.py
function load_ensemble_spec (line 26) | def load_ensemble_spec():
function develop_one_competition (line 32) | def develop_one_competition(competition: str):
FILE: rdagent/components/coder/data_science/feature/__init__.py
class FeatureMultiProcessEvolvingStrategy (line 28) | class FeatureMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 29) | def implement_one_task(
method assign_code_list_to_evo (line 101) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class FeatureCoSTEER (line 118) | class FeatureCoSTEER(DSCoSTEER):
method __init__ (line 119) | def __init__(
FILE: rdagent/components/coder/data_science/feature/eval.py
class FeatureCoSTEEREvaluator (line 19) | class FeatureCoSTEEREvaluator(CoSTEEREvaluator):
method evaluate (line 20) | def evaluate(
FILE: rdagent/components/coder/data_science/feature/exp.py
class FeatureTask (line 12) | class FeatureTask(CoSTEERTask):
FILE: rdagent/components/coder/data_science/feature/test.py
function develop_one_competition (line 15) | def develop_one_competition(competition: str): # -> experiment
FILE: rdagent/components/coder/data_science/model/__init__.py
class ModelMultiProcessEvolvingStrategy (line 30) | class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 31) | def implement_one_task(
method assign_code_list_to_evo (line 133) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class ModelCoSTEER (line 150) | class ModelCoSTEER(DSCoSTEER):
method __init__ (line 151) | def __init__(
FILE: rdagent/components/coder/data_science/model/eval.py
class ModelGeneralCaseSpecEvaluator (line 29) | class ModelGeneralCaseSpecEvaluator(CoSTEEREvaluator):
method evaluate (line 38) | def evaluate(
FILE: rdagent/components/coder/data_science/model/exp.py
class ModelTask (line 7) | class ModelTask(CoSTEERTask):
method __init__ (line 8) | def __init__(
method get_task_information (line 17) | def get_task_information(self):
FILE: rdagent/components/coder/data_science/model/test.py
function develop_one_competition (line 19) | def develop_one_competition(competition: str):
FILE: rdagent/components/coder/data_science/pipeline/__init__.py
class PipelineMultiProcessEvolvingStrategy (line 54) | class PipelineMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 55) | def implement_one_task(
method assign_code_list_to_evo (line 116) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class PipelineCoSTEER (line 133) | class PipelineCoSTEER(DSCoSTEER):
method __init__ (line 134) | def __init__(
FILE: rdagent/components/coder/data_science/pipeline/eval.py
class DSCoderFeedback (line 33) | class DSCoderFeedback(CoSTEERSingleFeedback):
method val_and_update_init_dict (line 43) | def val_and_update_init_dict(data: dict) -> dict:
method __str__ (line 71) | def __str__(self) -> str:
method merge (line 87) | def merge(cls, feedback_li: list[CoSTEERSingleFeedback]) -> "DSCoderFe...
class PipelineCoSTEEREvaluator (line 123) | class PipelineCoSTEEREvaluator(CoSTEEREvaluator):
method evaluate (line 125) | def evaluate(
FILE: rdagent/components/coder/data_science/pipeline/exp.py
class PipelineTask (line 5) | class PipelineTask(CoSTEERTask):
method __init__ (line 6) | def __init__(self, name: str = "Pipeline", package_info: str | None = ...
FILE: rdagent/components/coder/data_science/raw_data_loader/__init__.py
class DataLoaderMultiProcessEvolvingStrategy (line 58) | class DataLoaderMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 59) | def implement_one_task(
method assign_code_list_to_evo (line 181) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class DataLoaderCoSTEER (line 198) | class DataLoaderCoSTEER(DSCoSTEER):
method __init__ (line 199) | def __init__(
method develop (line 222) | def develop(self, exp):
FILE: rdagent/components/coder/data_science/raw_data_loader/eval.py
class DataLoaderCoSTEEREvaluator (line 26) | class DataLoaderCoSTEEREvaluator(CoSTEEREvaluator):
method evaluate (line 27) | def evaluate(
FILE: rdagent/components/coder/data_science/raw_data_loader/exp.py
class DataLoaderTask (line 5) | class DataLoaderTask(CoSTEERTask):
FILE: rdagent/components/coder/data_science/raw_data_loader/test.py
function develop_one_competition (line 15) | def develop_one_competition(competition: str): # -> experiment
FILE: rdagent/components/coder/data_science/share/doc.py
class DocDev (line 12) | class DocDev(Developer[Experiment]):
method develop (line 17) | def develop(self, exp: Experiment) -> None:
FILE: rdagent/components/coder/data_science/share/ds_costeer.py
class DSCoSTEER (line 4) | class DSCoSTEER(CoSTEER):
method get_develop_max_seconds (line 5) | def get_develop_max_seconds(self) -> int | None:
FILE: rdagent/components/coder/data_science/share/eval.py
class ModelDumpEvaluator (line 29) | class ModelDumpEvaluator(CoSTEEREvaluator):
method __init__ (line 32) | def __init__(self, scen: Scenario, data_type: Literal["sample", "full"]):
method evaluate (line 36) | def evaluate(
FILE: rdagent/components/coder/data_science/share/notebook.py
class NotebookConverter (line 22) | class NotebookConverter:
method validate_code_format (line 27) | def validate_code_format(self, code: str) -> str | None:
method convert (line 41) | def convert(
FILE: rdagent/components/coder/data_science/share/util.py
class CodeSection (line 9) | class CodeSection(TypedDict):
function extract_function_body (line 20) | def extract_function_body(source_code: str, function_name: str) -> Optio...
function split_sections (line 39) | def split_sections(
function split_code_sections (line 79) | def split_code_sections(source_code: str) -> tuple[Optional[str], list[s...
function split_output_sections (line 86) | def split_output_sections(stdout: str, known_sections: list[str]) -> tup...
function extract_comment_under_first_print (line 94) | def extract_comment_under_first_print(source_code) -> tuple[Optional[str...
function extract_first_section_name_from_code (line 133) | def extract_first_section_name_from_code(source_code):
function extract_first_section_name_from_output (line 151) | def extract_first_section_name_from_output(stdout: str) -> Optional[str]:
function is_function_called (line 161) | def is_function_called(source_code: str, func_name: str) -> bool:
function remove_function (line 178) | def remove_function(source_code: str, function_name: str) -> str:
function remove_main_block (line 194) | def remove_main_block(source_code: str) -> str:
function extract_top_level_functions_with_decorators_and_comments (line 224) | def extract_top_level_functions_with_decorators_and_comments(
function split_code_and_output_into_sections (line 300) | def split_code_and_output_into_sections(code: str, stdout: str) -> list[...
FILE: rdagent/components/coder/data_science/utils.py
function remove_eda_part (line 4) | def remove_eda_part(stdout: str) -> str:
FILE: rdagent/components/coder/data_science/workflow/__init__.py
class WorkflowMultiProcessEvolvingStrategy (line 26) | class WorkflowMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 27) | def implement_one_task(
method assign_code_list_to_evo (line 94) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class WorkflowCoSTEER (line 111) | class WorkflowCoSTEER(DSCoSTEER):
method __init__ (line 112) | def __init__(
FILE: rdagent/components/coder/data_science/workflow/eval.py
class WorkflowGeneralCaseSpecEvaluator (line 27) | class WorkflowGeneralCaseSpecEvaluator(CoSTEEREvaluator):
method evaluate (line 36) | def evaluate(
FILE: rdagent/components/coder/data_science/workflow/exp.py
class WorkflowTask (line 12) | class WorkflowTask(CoSTEERTask):
method __init__ (line 13) | def __init__(self, name: str = "Workflow", *args, **kwargs) -> None:
FILE: rdagent/components/coder/data_science/workflow/test.py
function develop_one_competition (line 18) | def develop_one_competition(competition: str):
FILE: rdagent/components/coder/factor_coder/__init__.py
class FactorCoSTEER (line 12) | class FactorCoSTEER(CoSTEER):
method __init__ (line 13) | def __init__(
method develop (line 25) | def develop(self, exp: Experiment) -> Experiment:
FILE: rdagent/components/coder/factor_coder/config.py
class FactorCoSTEERSettings (line 10) | class FactorCoSTEERSettings(CoSTEERSettings):
function get_factor_env (line 32) | def get_factor_env(
FILE: rdagent/components/coder/factor_coder/eva_utils.py
class FactorEvaluator (line 16) | class FactorEvaluator:
method __init__ (line 19) | def __init__(self, scen=None) -> None:
method evaluate (line 23) | def evaluate(
method _get_df (line 46) | def _get_df(self, gt_implementation: Workspace, implementation: Worksp...
method __str__ (line 63) | def __str__(self) -> str:
class FactorCodeEvaluator (line 67) | class FactorCodeEvaluator(FactorEvaluator):
method evaluate (line 68) | def evaluate(
class FactorInfEvaluator (line 120) | class FactorInfEvaluator(FactorEvaluator):
method evaluate (line 121) | def evaluate(
class FactorSingleColumnEvaluator (line 142) | class FactorSingleColumnEvaluator(FactorEvaluator):
method evaluate (line 143) | def evaluate(
class FactorOutputFormatEvaluator (line 163) | class FactorOutputFormatEvaluator(FactorEvaluator):
method evaluate (line 164) | def evaluate(
class FactorDatetimeDailyEvaluator (line 217) | class FactorDatetimeDailyEvaluator(FactorEvaluator):
method evaluate (line 218) | def evaluate(
class FactorRowCountEvaluator (line 247) | class FactorRowCountEvaluator(FactorEvaluator):
method evaluate (line 248) | def evaluate(
class FactorIndexEvaluator (line 271) | class FactorIndexEvaluator(FactorEvaluator):
method evaluate (line 272) | def evaluate(
class FactorMissingValuesEvaluator (line 296) | class FactorMissingValuesEvaluator(FactorEvaluator):
method evaluate (line 297) | def evaluate(
class FactorEqualValueRatioEvaluator (line 317) | class FactorEqualValueRatioEvaluator(FactorEvaluator):
method evaluate (line 318) | def evaluate(
class FactorCorrelationEvaluator (line 348) | class FactorCorrelationEvaluator(FactorEvaluator):
method __init__ (line 349) | def __init__(self, hard_check: bool, *args, **kwargs) -> None:
method evaluate (line 353) | def evaluate(
class FactorValueEvaluator (line 389) | class FactorValueEvaluator(FactorEvaluator):
method evaluate (line 390) | def evaluate(
class FactorFinalDecisionEvaluator (line 478) | class FactorFinalDecisionEvaluator(FactorEvaluator):
method evaluate (line 479) | def evaluate(
FILE: rdagent/components/coder/factor_coder/evaluators.py
class FactorEvaluatorForCoder (line 20) | class FactorEvaluatorForCoder(CoSTEEREvaluator):
method __init__ (line 25) | def __init__(self, *args, **kwargs) -> None:
method evaluate (line 31) | def evaluate(
function shorten_prompt (line 124) | def shorten_prompt(tpl: str, render_kwargs: dict, shorten_key: str, max_...
FILE: rdagent/components/coder/factor_coder/evolving_strategy.py
class FactorMultiProcessEvolvingStrategy (line 23) | class FactorMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method __init__ (line 24) | def __init__(self, *args, **kwargs) -> None:
method error_summary (line 29) | def error_summary(
method implement_one_task (line 60) | def implement_one_task(
method assign_code_list_to_evo (line 166) | def assign_code_list_to_evo(self, code_list, evo):
FILE: rdagent/components/coder/factor_coder/factor.py
class FactorTask (line 20) | class FactorTask(CoSTEERTask):
method __init__ (line 23) | def __init__(
method factor_description (line 44) | def factor_description(self):
method get_task_information (line 48) | def get_task_information(self):
method get_task_brief_information (line 54) | def get_task_brief_information(self):
method get_task_information_and_implementation_result (line 60) | def get_task_information_and_implementation_result(self):
method from_dict (line 70) | def from_dict(dict):
method __repr__ (line 73) | def __repr__(self) -> str:
class FactorFBWorkspace (line 77) | class FactorFBWorkspace(FBWorkspace):
method __init__ (line 90) | def __init__(
method hash_func (line 99) | def hash_func(self, data_type: str = "Debug") -> str:
method execute (line 107) | def execute(self, data_type: str = "Debug") -> Tuple[str, pd.DataFrame]:
method __str__ (line 212) | def __str__(self) -> str:
method __repr__ (line 217) | def __repr__(self) -> str:
method from_folder (line 221) | def from_folder(task: FactorTask, path: Union[str, Path], **kwargs):
FILE: rdagent/components/coder/finetune/__init__.py
class LLMFinetuneEvolvingStrategy (line 45) | class LLMFinetuneEvolvingStrategy(MultiProcessEvolvingStrategy):
method __init__ (line 48) | def __init__(self, scen: Scenario, settings, *args, **kwargs):
method implement_func_list (line 52) | def implement_func_list(self) -> list[Callable]:
method implement_data (line 55) | def implement_data(
method _get_dataset_info (line 153) | def _get_dataset_info(self, involving_datasets: list[str], datasets_pa...
method implement_lf_config (line 245) | def implement_lf_config(
method _generate_llamafactory_config_with_llm (line 288) | def _generate_llamafactory_config_with_llm(
class LLMFinetuneCoSTEER (line 368) | class LLMFinetuneCoSTEER(CoSTEER):
method __init__ (line 371) | def __init__(
FILE: rdagent/components/coder/finetune/conf.py
function is_docker_env (line 27) | def is_docker_env(env: Env) -> bool:
function get_workspace_prefix (line 32) | def get_workspace_prefix(env: Env) -> str:
function get_data_processing_cache_key (line 52) | def get_data_processing_cache_key(local_path: str | Path) -> list[list[s...
class FTPathConfig (line 70) | class FTPathConfig:
method is_docker (line 85) | def is_docker(self) -> bool:
method models (line 92) | def models(self) -> str:
method datasets (line 99) | def datasets(self) -> str:
method workspace (line 106) | def workspace(self) -> str:
method deepspeed (line 111) | def deepspeed(self) -> str:
class FTCoderCoSTEERSettings (line 126) | class FTCoderCoSTEERSettings(CoSTEERSettings):
class Config (line 129) | class Config:
function _get_standard_ft_volumes (line 142) | def _get_standard_ft_volumes() -> dict:
function get_ft_env (line 168) | def get_ft_env(
function get_data_processing_env (line 231) | def get_data_processing_env(
function clear_workspace (line 273) | def clear_workspace(workspace: FBWorkspace, env: Env) -> None:
function get_benchmark_env (line 308) | def get_benchmark_env(
function inject_data_stats (line 368) | def inject_data_stats(implementation: FBWorkspace, data: list, stdout: s...
FILE: rdagent/components/coder/finetune/eval.py
class FTDataEvaluator (line 42) | class FTDataEvaluator(CoSTEEREvaluator):
method evaluate (line 51) | def evaluate(
method _generate_llm_feedback (line 134) | def _generate_llm_feedback(
method _validate_data_json (line 207) | def _validate_data_json(self, data_json_path: Path) -> dict:
method _update_dataset_info (line 246) | def _update_dataset_info(self, implementation: FBWorkspace, sample_cou...
method _sample_data (line 271) | def _sample_data(self, data: list, n: int = 5) -> list:
method _analyze_data_quality (line 277) | def _analyze_data_quality(self, data: list) -> dict:
class FTCoderEvaluator (line 307) | class FTCoderEvaluator(CoSTEEREvaluator):
method __init__ (line 310) | def __init__(self, *args, **kwargs):
method evaluate (line 313) | def evaluate(
FILE: rdagent/components/coder/finetune/exp.py
class FTTask (line 12) | class FTTask(CoSTEERTask):
method __init__ (line 15) | def __init__(
method get_task_information (line 31) | def get_task_information(self) -> str:
FILE: rdagent/components/coder/finetune/unified_validator.py
class ValidationResult (line 43) | class ValidationResult:
class LLMConfigValidator (line 54) | class LLMConfigValidator:
method __init__ (line 63) | def __init__(self):
method validate_and_test (line 66) | def validate_and_test(self, config_yaml: str, workspace: FBWorkspace, ...
method _filter_parameters (line 89) | def _filter_parameters(self, config_yaml: str) -> tuple[str, List[str]]:
method _inject_required_parameters (line 114) | def _inject_required_parameters(self, config_yaml: str) -> str:
method _get_supported_parameters (line 128) | def _get_supported_parameters(self) -> Set[str]:
method _parse_execution_log (line 158) | def _parse_execution_log(self, stdout: str, exit_code: int, failed_sta...
method _run_micro_batch_test (line 256) | def _run_micro_batch_test(self, config_yaml: str, workspace: FBWorkspa...
FILE: rdagent/components/coder/model_coder/__init__.py
class ModelCoSTEER (line 11) | class ModelCoSTEER(CoSTEER):
method __init__ (line 12) | def __init__(
FILE: rdagent/components/coder/model_coder/benchmark/eval.py
function get_data_conf (line 7) | def get_data_conf(init_val):
class ModelImpValEval (line 17) | class ModelImpValEval:
method evaluate (line 35) | def evaluate(self, gt: ModelFBWorkspace, gen: ModelFBWorkspace):
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/A-DGN.py
class AntiSymmetricConv (line 13) | class AntiSymmetricConv(torch.nn.Module):
method __init__ (line 54) | def __init__(
method reset_parameters (line 87) | def reset_parameters(self):
method forward (line 93) | def forward(self, x: Tensor, edge_index: Adj, *args, **kwargs) -> Tensor:
method __repr__ (line 111) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/dirgnn.py
class DirGNNConv (line 8) | class DirGNNConv(torch.nn.Module):
method __init__ (line 26) | def __init__(
method reset_parameters (line 54) | def reset_parameters(self):
method forward (line 61) | def forward(self, x: Tensor, edge_index: Tensor) -> Tensor:
method __repr__ (line 73) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/gpsconv.py
class GPSConv (line 16) | class GPSConv(torch.nn.Module):
method __init__ (line 59) | def __init__(
method reset_parameters (line 116) | def reset_parameters(self):
method forward (line 129) | def forward(
method __repr__ (line 178) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/linkx.py
class SparseLinear (line 13) | class SparseLinear(MessagePassing):
method __init__ (line 14) | def __init__(self, in_channels: int, out_channels: int, bias: bool = T...
method reset_parameters (line 27) | def reset_parameters(self):
method forward (line 31) | def forward(
method message (line 44) | def message(self, weight_j: Tensor, edge_weight: OptTensor) -> Tensor:
method message_and_aggregate (line 50) | def message_and_aggregate(self, adj_t: Adj, weight: Tensor) -> Tensor:
class LINKX (line 54) | class LINKX(torch.nn.Module):
method __init__ (line 88) | def __init__(
method reset_parameters (line 127) | def reset_parameters(self):
method forward (line 139) | def forward(
method __repr__ (line 162) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/pmlp.py
class PMLP (line 10) | class PMLP(torch.nn.Module):
method __init__ (line 30) | def __init__(
method reset_parameters (line 68) | def reset_parameters(self):
method forward (line 75) | def forward(
method __repr__ (line 98) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/benchmark/gt_code/visnet.py
class CosineCutoff (line 12) | class CosineCutoff(torch.nn.Module):
method __init__ (line 28) | def __init__(self, cutoff: float) -> None:
method forward (line 32) | def forward(self, distances: Tensor) -> Tensor:
class ExpNormalSmearing (line 48) | class ExpNormalSmearing(torch.nn.Module):
method __init__ (line 64) | def __init__(
method _initial_params (line 86) | def _initial_params(self) -> Tuple[Tensor, Tensor]:
method reset_parameters (line 93) | def reset_parameters(self):
method forward (line 99) | def forward(self, dist: Tensor) -> Tensor:
class Sphere (line 110) | class Sphere(torch.nn.Module):
method __init__ (line 124) | def __init__(self, lmax: int = 2) -> None:
method forward (line 128) | def forward(self, edge_vec: Tensor) -> Tensor:
method _spherical_harmonics (line 142) | def _spherical_harmonics(
class VecLayerNorm (line 188) | class VecLayerNorm(torch.nn.Module):
method __init__ (line 203) | def __init__(
method reset_parameters (line 223) | def reset_parameters(self):
method max_min_norm (line 227) | def max_min_norm(self, vec: Tensor) -> Tensor:
method forward (line 258) | def forward(self, vec: Tensor) -> Tensor:
class Distance (line 279) | class Distance(torch.nn.Module):
method __init__ (line 296) | def __init__(
method forward (line 307) | def forward(
class NeighborEmbedding (line 344) | class NeighborEmbedding(MessagePassing):
method __init__ (line 358) | def __init__(
method reset_parameters (line 373) | def reset_parameters(self):
method forward (line 381) | def forward(
method message (line 416) | def message(self, x_j: Tensor, W: Tensor) -> Tensor:
class EdgeEmbedding (line 420) | class EdgeEmbedding(torch.nn.Module):
method __init__ (line 431) | def __init__(self, num_rbf: int, hidden_channels: int) -> None:
method reset_parameters (line 436) | def reset_parameters(self):
method forward (line 441) | def forward(
class ViS_MP (line 462) | class ViS_MP(MessagePassing):
method __init__ (line 482) | def __init__(
method vector_rejection (line 536) | def vector_rejection(vec: Tensor, d_ij: Tensor) -> Tensor:
method reset_parameters (line 546) | def reset_parameters(self):
method forward (line 573) | def forward(
method message (line 624) | def message(
method edge_update (line 638) | def edge_update(self, vec_i: Tensor, vec_j: Tensor, d_ij: Tensor, f_ij...
method aggregate (line 645) | def aggregate(
class ViS_MP_Vertex (line 658) | class ViS_MP_Vertex(ViS_MP):
method __init__ (line 678) | def __init__(
method reset_parameters (line 696) | def reset_parameters(self):
method edge_update (line 706) | def edge_update(self, vec_i: Tensor, vec_j: Tensor, d_ij: Tensor, f_ij...
class ViSNetBlock (line 720) | class ViSNetBlock(torch.nn.Module):
method __init__ (line 752) | def __init__(
method reset_parameters (line 811) | def reset_parameters(self):
method forward (line 822) | def forward(
class GatedEquivariantBlock (line 866) | class GatedEquivariantBlock(torch.nn.Module):
method __init__ (line 884) | def __init__(
method reset_parameters (line 910) | def reset_parameters(self):
method forward (line 919) | def forward(self, x: Tensor, v: Tensor) -> Tuple[Tensor, Tensor]:
class EquivariantScalar (line 940) | class EquivariantScalar(torch.nn.Module):
method __init__ (line 949) | def __init__(self, hidden_channels: int) -> None:
method reset_parameters (line 969) | def reset_parameters(self):
method pre_reduce (line 974) | def pre_reduce(self, x: Tensor, v: Tensor) -> Tensor:
class Atomref (line 990) | class Atomref(torch.nn.Module):
method __init__ (line 1000) | def __init__(
method reset_parameters (line 1020) | def reset_parameters(self):
method forward (line 1024) | def forward(self, x: Tensor, z: Tensor) -> Tensor:
class ViSNet (line 1034) | class ViSNet(torch.nn.Module):
method __init__ (line 1077) | def __init__(
method reset_parameters (line 1124) | def reset_parameters(self):
method forward (line 1131) | def forward(
FILE: rdagent/components/coder/model_coder/conf.py
class ModelCoSTEERSettings (line 9) | class ModelCoSTEERSettings(CoSTEERSettings):
function get_model_env (line 16) | def get_model_env(
FILE: rdagent/components/coder/model_coder/eva_utils.py
function shape_evaluator (line 15) | def shape_evaluator(prediction: np.ndarray, target_shape: Tuple = None) ...
function value_evaluator (line 32) | def value_evaluator(
class ModelCodeEvaluator (line 52) | class ModelCodeEvaluator(CoSTEEREvaluator):
method evaluate (line 53) | def evaluate(
class ModelFinalEvaluator (line 105) | class ModelFinalEvaluator(CoSTEEREvaluator):
method evaluate (line 106) | def evaluate(
FILE: rdagent/components/coder/model_coder/evaluators.py
class ModelCoSTEEREvaluator (line 20) | class ModelCoSTEEREvaluator(CoSTEEREvaluator):
method evaluate (line 21) | def evaluate(
FILE: rdagent/components/coder/model_coder/evolving_strategy.py
class ModelMultiProcessEvolvingStrategy (line 20) | class ModelMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 21) | def implement_one_task(
method assign_code_list_to_evo (line 82) | def assign_code_list_to_evo(self, code_list, evo):
FILE: rdagent/components/coder/model_coder/gt_code.py
class AntiSymmetricConv (line 18) | class AntiSymmetricConv(torch.nn.Module):
method __init__ (line 59) | def __init__(
method reset_parameters (line 92) | def reset_parameters(self):
method forward (line 98) | def forward(self, x: Tensor, edge_index: Adj, *args, **kwargs) -> Tensor:
method __repr__ (line 116) | def __repr__(self) -> str:
FILE: rdagent/components/coder/model_coder/model.py
class ModelTask (line 15) | class ModelTask(CoSTEERTask):
method __init__ (line 16) | def __init__(
method get_task_information (line 39) | def get_task_information(self):
method get_task_brief_information (line 51) | def get_task_brief_information(self):
method from_dict (line 62) | def from_dict(dict):
method __repr__ (line 65) | def __repr__(self) -> str:
class ModelFBWorkspace (line 69) | class ModelFBWorkspace(FBWorkspace):
method hash_func (line 90) | def hash_func(
method execute (line 105) | def execute(
FILE: rdagent/components/coder/model_coder/one_shot/__init__.py
class ModelCodeWriter (line 12) | class ModelCodeWriter(Developer[ModelExperiment]):
method develop (line 13) | def develop(self, exp: ModelExperiment) -> ModelExperiment:
FILE: rdagent/components/coder/model_coder/task_loader.py
function extract_model_from_doc (line 20) | def extract_model_from_doc(doc_content: str) -> dict:
function merge_file_to_model_dict_to_model_dict (line 71) | def merge_file_to_model_dict_to_model_dict(
function extract_model_from_docs (line 92) | def extract_model_from_docs(docs_dict):
class ModelExperimentLoaderFromDict (line 99) | class ModelExperimentLoaderFromDict(ModelTaskLoader):
method load (line 100) | def load(self, model_dict: dict) -> QlibModelExperiment:
class ModelExperimentLoaderFromPDFfiles (line 118) | class ModelExperimentLoaderFromPDFfiles(ModelTaskLoader):
method load (line 120) | def load(self, file_or_folder_path: str) -> QlibModelExperiment:
FILE: rdagent/components/coder/rl/costeer.py
class RLCoderCoSTEERSettings (line 23) | class RLCoderCoSTEERSettings(CoSTEERSettings):
class RLEvolvingStrategy (line 29) | class RLEvolvingStrategy(EvolvingStrategy):
method __init__ (line 32) | def __init__(self, scen: Scenario, settings: CoSTEERSettings):
method evolve_iter (line 36) | def evolve_iter(
method _generate_code (line 54) | def _generate_code(self, task: Task, evolving_trace: list[EvoStep] = [...
method _mock_code (line 85) | def _mock_code(self) -> dict[str, str]:
class RLCoderEvaluator (line 98) | class RLCoderEvaluator:
method __init__ (line 101) | def __init__(self, scen: Scenario) -> None:
method evaluate (line 104) | def evaluate(
class RLCoSTEER (line 121) | class RLCoSTEER(CoSTEER):
method __init__ (line 124) | def __init__(self, scen: Scenario, *args, **kwargs) -> None:
FILE: rdagent/components/document_reader/document_reader.py
function load_documents_by_langchain (line 20) | def load_documents_by_langchain(path: str) -> list:
function process_documents_by_langchain (line 36) | def process_documents_by_langchain(docs: list[Document]) -> dict[str, str]:
function load_and_process_pdfs_by_langchain (line 63) | def load_and_process_pdfs_by_langchain(path: str) -> dict[str, str]:
function load_and_process_one_pdf_by_azure_document_intelligence (line 67) | def load_and_process_one_pdf_by_azure_document_intelligence(
function load_and_process_pdfs_by_azure_document_intelligence (line 87) | def load_and_process_pdfs_by_azure_document_intelligence(path: Path) -> ...
function extract_first_page_screenshot_from_pdf (line 112) | def extract_first_page_screenshot_from_pdf(pdf_path: str) -> Image:
FILE: rdagent/components/interactor/__init__.py
class SkipInteractor (line 6) | class SkipInteractor(Interactor[ASpecificExp]):
method interact (line 8) | def interact(self, exp: ASpecificExp, trace: Trace) -> ASpecificExp:
FILE: rdagent/components/knowledge_management/graph.py
class UndirectedNode (line 22) | class UndirectedNode(Node):
method __init__ (line 23) | def __init__(self, content: str = "", label: str = "", embedding: Any ...
method add_neighbor (line 29) | def add_neighbor(self, node: UndirectedNode) -> None:
method remove_neighbor (line 33) | def remove_neighbor(self, node: UndirectedNode) -> None:
method get_neighbors (line 38) | def get_neighbors(self) -> set[UndirectedNode]:
method __str__ (line 41) | def __str__(self) -> str:
method __repr__ (line 47) | def __repr__(self) -> str:
class Graph (line 54) | class Graph(KnowledgeBase):
method __init__ (line 59) | def __init__(self, path: str | Path | None = None) -> None:
method size (line 63) | def size(self) -> int:
method get_node (line 66) | def get_node(self, node_id: str) -> Node | None:
method add_node (line 69) | def add_node(self, **kwargs: Any) -> NoReturn:
method get_all_nodes (line 72) | def get_all_nodes(self) -> list[Node]:
method get_all_nodes_by_label_list (line 75) | def get_all_nodes_by_label_list(self, label_list: list[str]) -> list[N...
method find_node (line 78) | def find_node(self, content: str, label: str) -> Node | None:
method batch_embedding (line 85) | def batch_embedding(nodes: list[Node]) -> list[Node]:
method __str__ (line 104) | def __str__(self) -> str:
class UndirectedGraph (line 108) | class UndirectedGraph(Graph):
method __init__ (line 113) | def __init__(self, path: str | Path | None = None) -> None:
method __str__ (line 117) | def __str__(self) -> str:
method add_node (line 120) | def add_node(
method add_nodes (line 169) | def add_nodes(self, node: UndirectedNode, neighbors: list[UndirectedNo...
method get_node (line 176) | def get_node(self, node_id: str) -> UndirectedNode:
method get_node_by_content (line 179) | def get_node_by_content(self, content: str) -> UndirectedNode | None:
method get_nodes_within_steps (line 197) | def get_nodes_within_steps(
method get_nodes_intersection (line 235) | def get_nodes_intersection(
method semantic_search (line 275) | def semantic_search(
method clear (line 314) | def clear(self) -> None:
method query_by_node (line 318) | def query_by_node(
method query_by_content (line 356) | def query_by_content(
method intersection (line 430) | def intersection(nodes1: list[UndirectedNode], nodes2: list[Undirected...
method different (line 434) | def different(nodes1: list[UndirectedNode], nodes2: list[UndirectedNod...
method cal_distance (line 438) | def cal_distance(node1: UndirectedNode, node2: UndirectedNode) -> float:
method filter_label (line 442) | def filter_label(nodes: list[UndirectedNode], labels: list[str]) -> li...
function graph_to_edges (line 446) | def graph_to_edges(graph: dict[str, list[str]]) -> list[tuple[str, str]]:
function assign_random_coordinate_to_node (line 458) | def assign_random_coordinate_to_node(
function assign_isometric_coordinate_to_node (line 472) | def assign_isometric_coordinate_to_node(
function curly_node_coordinate (line 488) | def curly_node_coordinate(
FILE: rdagent/components/knowledge_management/vector_base.py
class KnowledgeMetaData (line 13) | class KnowledgeMetaData:
method __init__ (line 14) | def __init__(self, content: str = "", label: str = None, embedding=Non...
method split_into_trunk (line 22) | def split_into_trunk(self, size: int = 1000, overlap: int = 0):
method create_embedding (line 40) | def create_embedding(self):
method from_dict (line 50) | def from_dict(self, data: dict):
method __repr__ (line 55) | def __repr__(self):
function contents_to_documents (line 62) | def contents_to_documents(contents: List[str], label: str = None) -> Lis...
class VectorBase (line 72) | class VectorBase(KnowledgeBase):
method add (line 77) | def add(self, document: Union[Document, List[Document]]):
method search (line 90) | def search(self, content: str, topk_k: int | None = None, similarity_t...
class PDVectorBase (line 106) | class PDVectorBase(VectorBase):
method __init__ (line 111) | def __init__(self, path: Union[str, Path] = None):
method shape (line 115) | def shape(self):
method add (line 118) | def add(self, document: Union[Document, List[Document]]):
method search (line 158) | def search(
FILE: rdagent/components/loader/experiment_loader.py
class FactorExperimentLoader (line 5) | class FactorExperimentLoader(Loader[FactorExperiment]):
class ModelExperimentLoader (line 9) | class ModelExperimentLoader(Loader[FactorExperiment]):
FILE: rdagent/components/loader/task_loader.py
class FactorTaskLoader (line 10) | class FactorTaskLoader(Loader[FactorTask]):
class ModelTaskLoader (line 14) | class ModelTaskLoader(Loader[ModelTask]):
class ModelTaskLoaderJson (line 18) | class ModelTaskLoaderJson(ModelTaskLoader):
method __init__ (line 44) | def __init__(self, json_uri: str) -> None:
method load (line 48) | def load(self, *argT, **kwargs) -> Sequence[ModelTask]:
class ModelWsLoader (line 85) | class ModelWsLoader(WsLoader[ModelTask, ModelFBWorkspace]):
method __init__ (line 86) | def __init__(self, path: Path) -> None:
method load (line 89) | def load(self, task: ModelTask) -> ModelFBWorkspace:
FILE: rdagent/components/proposal/__init__.py
class LLMHypothesisGen (line 18) | class LLMHypothesisGen(HypothesisGen):
method __init__ (line 19) | def __init__(self, scen: Scenario):
method prepare_context (line 24) | def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ...
method convert_response (line 27) | def convert_response(self, response: str) -> Hypothesis: ...
method gen (line 29) | def gen(
class FactorHypothesisGen (line 68) | class FactorHypothesisGen(LLMHypothesisGen):
method __init__ (line 69) | def __init__(self, scen: Scenario):
class ModelHypothesisGen (line 74) | class ModelHypothesisGen(LLMHypothesisGen):
method __init__ (line 75) | def __init__(self, scen: Scenario):
class FactorAndModelHypothesisGen (line 80) | class FactorAndModelHypothesisGen(LLMHypothesisGen):
method __init__ (line 81) | def __init__(self, scen: Scenario):
class LLMHypothesis2Experiment (line 86) | class LLMHypothesis2Experiment(Hypothesis2Experiment[Experiment]):
method prepare_context (line 88) | def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tup...
method convert_response (line 91) | def convert_response(self, response: str, hypothesis: Hypothesis, trac...
method convert (line 94) | def convert(self, hypothesis: Hypothesis, trace: Trace) -> Experiment:
class FactorHypothesis2Experiment (line 124) | class FactorHypothesis2Experiment(LLMHypothesis2Experiment):
method __init__ (line 125) | def __init__(self):
class ModelHypothesis2Experiment (line 130) | class ModelHypothesis2Experiment(LLMHypothesis2Experiment):
method __init__ (line 131) | def __init__(self):
class FactorAndModelHypothesis2Experiment (line 136) | class FactorAndModelHypothesis2Experiment(LLMHypothesis2Experiment):
method __init__ (line 137) | def __init__(self):
FILE: rdagent/components/runner/__init__.py
class CachedRunner (line 6) | class CachedRunner(Developer[ASpecificExp]):
method get_cache_key (line 7) | def get_cache_key(self, exp: Experiment) -> str:
method assign_cached_result (line 16) | def assign_cached_result(self, exp: Experiment, cached_res: Experiment...
FILE: rdagent/components/workflow/conf.py
class BasePropSetting (line 4) | class BasePropSetting(ExtendedBaseSettings):
FILE: rdagent/components/workflow/rd_loop.py
class RDLoop (line 31) | class RDLoop(LoopBase, metaclass=LoopMeta):
method __init__ (line 33) | def __init__(self, PROP_SETTING: BasePropSetting):
method _set_interactor (line 71) | def _set_interactor(self, user_request_q: Queue, user_response_q: Queue):
method _init_base_features (line 75) | def _init_base_features(self, base_features_path: str | None):
method _interact_init_params (line 112) | def _interact_init_params(self) -> None:
method _interact_hypo (line 154) | def _interact_hypo(self, hypo: Hypothesis) -> Hypothesis:
method _interact_feedback (line 169) | def _interact_feedback(self, feedback: HypothesisFeedback) -> Hypothes...
method _propose (line 184) | def _propose(self):
method _exp_gen (line 193) | def _exp_gen(self, hypothesis: Hypothesis):
method direct_exp_gen (line 199) | async def direct_exp_gen(self, prev_out: dict[str, Any]):
method coding (line 212) | def coding(self, prev_out: dict[str, Any]):
method running (line 217) | def running(self, prev_out: dict[str, Any]):
method feedback (line 222) | def feedback(self, prev_out: dict[str, Any]):
method record (line 238) | def record(self, prev_out: dict[str, Any]):
FILE: rdagent/core/conf.py
class ExtendedBaseSettings (line 13) | class ExtendedBaseSettings(BaseSettings):
method settings_customise_sources (line 16) | def settings_customise_sources(
class RDAgentSettings (line 46) | class RDAgentSettings(ExtendedBaseSettings):
method get_max_parallel (line 92) | def get_max_parallel(self) -> int:
method is_force_subproc (line 102) | def is_force_subproc(self) -> bool:
FILE: rdagent/core/developer.py
class Developer (line 12) | class Developer(ABC, Generic[ASpecificExp]):
method __init__ (line 13) | def __init__(self, scen: Scenario) -> None:
method develop (line 17) | def develop(self, exp: ASpecificExp) -> ASpecificExp: # TODO: remove ...
FILE: rdagent/core/evaluation.py
class Feedback (line 8) | class Feedback:
method is_acceptable (line 15) | def is_acceptable(self) -> bool:
method finished (line 22) | def finished(self) -> bool:
method __bool__ (line 29) | def __bool__(self) -> bool:
class EvaluableObj (line 33) | class EvaluableObj:
class Evaluator (line 42) | class Evaluator(ABC):
method evaluate (line 53) | def evaluate(
FILE: rdagent/core/evolving_agent.py
class EvoAgent (line 26) | class EvoAgent(ABC, Generic[ASpecificEvaluator, ASpecificEvolvableSubjec...
method __init__ (line 28) | def __init__(self, max_loop: int, evolving_strategy: EvolvingStrategy)...
method multistep_evolve (line 33) | def multistep_evolve(
class RAGEvaluator (line 43) | class RAGEvaluator(IterEvaluator):
method evaluate_iter (line 46) | def evaluate_iter(
class RAGEvoAgent (line 78) | class RAGEvoAgent(EvoAgent[RAGEvaluator, ASpecificEvolvableSubjects], Ge...
method __init__ (line 80) | def __init__(
method _get_overall_feedback (line 120) | def _get_overall_feedback(
method multistep_evolve (line 140) | def multistep_evolve(
FILE: rdagent/core/evolving_framework.py
class Knowledge (line 17) | class Knowledge:
class QueriedKnowledge (line 21) | class QueriedKnowledge:
class EvolvingKnowledgeBase (line 25) | class EvolvingKnowledgeBase(KnowledgeBase):
method query (line 27) | def query(
class EvolvableSubjects (line 33) | class EvolvableSubjects(EvaluableObj):
method clone (line 36) | def clone(self) -> EvolvableSubjects:
class EvoStep (line 44) | class EvoStep(Generic[ASpecificEvolvableSubjects]):
class EvolvingStrategy (line 61) | class EvolvingStrategy(ABC, Generic[ASpecificEvolvableSubjects]):
method __init__ (line 62) | def __init__(self, scen: Scenario) -> None:
method evolve_iter (line 66) | def evolve_iter(
class IterEvaluator (line 94) | class IterEvaluator(Evaluator):
method evaluate (line 101) | def evaluate(self, eo: EvaluableObj) -> Feedback:
method evaluate_iter (line 115) | def evaluate_iter(self) -> Generator[Feedback, EvaluableObj | None, Fe...
class RAGStrategy (line 141) | class RAGStrategy(ABC, Generic[ASpecificEvolvableSubjects]):
method __init__ (line 144) | def __init__(self, *args: Any, **kwargs: Any) -> None:
method load_or_init_knowledge_base (line 148) | def load_or_init_knowledge_base(
method query (line 156) | def query(
method generate_knowledge (line 165) | def generate_knowledge(
method dump_knowledge_base (line 179) | def dump_knowledge_base(self, *args: Any, **kwargs: Any) -> None:
method load_dumped_knowledge_base (line 183) | def load_dumped_knowledge_base(self, *args: Any, **kwargs: Any) -> None:
FILE: rdagent/core/exception.py
class WorkflowError (line 1) | class WorkflowError(Exception):
class FormatError (line 7) | class FormatError(WorkflowError):
class CodeBlockParseError (line 13) | class CodeBlockParseError(FormatError):
method __init__ (line 16) | def __init__(self, message: str, content: str, language: str) -> None:
class CoderError (line 23) | class CoderError(WorkflowError):
class CodeFormatError (line 36) | class CodeFormatError(CoderError):
class CustomRuntimeError (line 42) | class CustomRuntimeError(CoderError):
class NoOutputError (line 48) | class NoOutputError(CoderError):
class RunnerError (line 54) | class RunnerError(Exception):
class KaggleError (line 67) | class KaggleError(Exception):
class PolicyError (line 73) | class PolicyError(Exception):
class EvaluatorDidNotTerminateError (line 79) | class EvaluatorDidNotTerminateError(RuntimeError):
FILE: rdagent/core/experiment.py
class AbsTask (line 34) | class AbsTask(ABC):
method __init__ (line 35) | def __init__(self, name: str, version: int = 1) -> None:
method get_task_information (line 45) | def get_task_information(self) -> str:
class UserInstructions (line 51) | class UserInstructions(list[str]):
method __str__ (line 52) | def __str__(self) -> str:
class Task (line 58) | class Task(AbsTask):
method __init__ (line 59) | def __init__(
method get_task_information (line 70) | def get_task_information(self) -> str:
method __repr__ (line 73) | def __repr__(self) -> str:
class RunningInfo (line 82) | class RunningInfo:
class Workspace (line 87) | class Workspace(ABC, Generic[ASpecificTask, ASpecificFeedback]):
method __init__ (line 93) | def __init__(self, target_task: ASpecificTask | None = None) -> None:
method execute (line 99) | def execute(self, *args: Any, **kwargs: Any) -> object | None:
method copy (line 104) | def copy(self) -> Workspace:
method all_codes (line 110) | def all_codes(self) -> str:
method create_ws_ckp (line 117) | def create_ws_ckp(self) -> None:
method recover_ws_ckp (line 123) | def recover_ws_ckp(self) -> None:
class WsLoader (line 132) | class WsLoader(ABC, Generic[ASpecificTask, ASpecificWS]):
method load (line 134) | def load(self, task: ASpecificTask) -> ASpecificWS:
class FBWorkspace (line 139) | class FBWorkspace(Workspace):
method __init__ (line 162) | def __init__(self, *args: Any, **kwargs: Any) -> None:
method _format_code_dict (line 172) | def _format_code_dict(code_dict: dict[str, str]) -> str:
method all_codes (line 182) | def all_codes(self) -> str:
method get_codes (line 189) | def get_codes(self, pattern: str) -> str:
method prepare (line 198) | def prepare(self) -> None:
method link_all_files_in_folder_to_workspace (line 209) | def link_all_files_in_folder_to_workspace(data_path: Path, workspace_p...
method inject_files (line 223) | def inject_files(self, **files: str) -> None:
method remove_files (line 245) | def remove_files(self, file_names: str | list[str]) -> None:
method get_files (line 257) | def get_files(self) -> list[Path]:
method inject_code_from_folder (line 266) | def inject_code_from_folder(self, folder_path: Path) -> None:
method inject_code_from_file_dict (line 275) | def inject_code_from_file_dict(self, workspace: FBWorkspace) -> None:
method inject_from_workspace (line 283) | def inject_from_workspace(self, workspace: FBWorkspace) -> None:
method copy (line 287) | def copy(self) -> FBWorkspace:
method clear (line 293) | def clear(self) -> None:
method before_execute (line 300) | def before_execute(self) -> None:
method execute (line 307) | def execute(self, env: Env, entry: str) -> str:
method run (line 314) | def run(self, env: Env, entry: str) -> EnvResult:
method create_ws_ckp (line 324) | def create_ws_ckp(self) -> None:
method recover_ws_ckp (line 350) | def recover_ws_ckp(self) -> None:
method __str__ (line 380) | def __str__(self) -> str:
class ExperimentPlan (line 390) | class ExperimentPlan(dict[str, Any]):
class Experiment (line 396) | class Experiment(
method __init__ (line 404) | def __init__(
method set_user_instructions (line 446) | def set_user_instructions(self, user_instructions: UserInstructions | ...
method result (line 461) | def result(self) -> object:
method result (line 465) | def result(self, value: object) -> None:
method create_ws_ckp (line 469) | def create_ws_ckp(self) -> None:
method recover_ws_ckp (line 476) | def recover_ws_ckp(self) -> None:
class Loader (line 495) | class Loader(ABC, Generic[TaskOrExperiment]):
method load (line 497) | def load(self, *args: Any, **kwargs: Any) -> TaskOrExperiment:
FILE: rdagent/core/interactor.py
class Interactor (line 13) | class Interactor(ABC, Generic[ASpecificExp]):
method __init__ (line 14) | def __init__(self, scen: Scenario) -> None:
method interact (line 18) | def interact(self, exp: ASpecificExp, trace: Trace | None = None) -> A...
FILE: rdagent/core/knowledge_base.py
class KnowledgeBase (line 8) | class KnowledgeBase:
method __init__ (line 9) | def __init__(self, path: str | Path | None = None) -> None:
method load (line 13) | def load(self) -> None:
method dump (line 22) | def dump(self) -> None:
FILE: rdagent/core/prompts.py
class Prompts (line 8) | class Prompts(SingletonBaseClass, dict[str, str]):
method __init__ (line 9) | def __init__(self, file_path: Path) -> None:
FILE: rdagent/core/proposal.py
class Hypothesis (line 24) | class Hypothesis:
method __init__ (line 32) | def __init__(
method __str__ (line 48) | def __str__(self) -> str:
class ExperimentFeedback (line 58) | class ExperimentFeedback(Feedback):
method __init__ (line 59) | def __init__(
method __bool__ (line 78) | def __bool__(self) -> bool:
method __str__ (line 81) | def __str__(self) -> str:
method from_exception (line 89) | def from_exception(cls, e: Exception) -> ExperimentFeedback:
class HypothesisFeedback (line 96) | class HypothesisFeedback(ExperimentFeedback):
method __init__ (line 97) | def __init__(
method __str__ (line 122) | def __str__(self) -> str:
class Trace (line 141) | class Trace(Generic[ASpecificScen, ASpecificKB]):
method __init__ (line 146) | def __init__(self, scen: ASpecificScen, knowledge_base: ASpecificKB | ...
method get_sota_hypothesis_and_experiment (line 178) | def get_sota_hypothesis_and_experiment(self) -> tuple[Hypothesis | Non...
method is_selection_new_tree (line 187) | def is_selection_new_tree(self, selection: tuple[int, ...] | None = No...
method get_current_selection (line 197) | def get_current_selection(self) -> tuple[int, ...]:
method set_current_selection (line 200) | def set_current_selection(self, selection: tuple[int, ...]) -> None:
method get_parent_exps (line 203) | def get_parent_exps(
method exp2idx (line 219) | def exp2idx(self, exp: Experiment | list[Experiment]) -> int | list[in...
method idx2exp (line 231) | def idx2exp(self, idx: int | list[int]) -> Experiment | list[Experiment]:
method is_parent (line 237) | def is_parent(self, parent_idx: int, child_idx: int) -> bool:
method get_parents (line 241) | def get_parents(self, child_idx: int) -> list[int]:
method sync_dag_parent_and_hist (line 256) | def sync_dag_parent_and_hist(
method get_children (line 286) | def get_children(self, parent_idx: int | None = None) -> list[NodeType]:
method get_sota_experiment (line 298) | def get_sota_experiment(self, node_id: int | None = None) -> Experimen...
class CheckpointSelector (line 321) | class CheckpointSelector:
method get_selection (line 327) | def get_selection(self, trace: Trace) -> tuple[int, ...] | None:
class SOTAexpSelector (line 343) | class SOTAexpSelector:
method get_sota_exp_to_submit (line 349) | def get_sota_exp_to_submit(self, trace: Trace) -> Experiment | None:
class ExpPlanner (line 355) | class ExpPlanner(ABC, Generic[ASpecificPlan]):
method __init__ (line 361) | def __init__(self, scen: Scenario) -> None:
method plan (line 365) | def plan(self, trace: Trace) -> ASpecificPlan:
class ExpGen (line 372) | class ExpGen(ABC):
method __init__ (line 374) | def __init__(self, scen: Scenario) -> None:
method gen (line 378) | def gen(self, trace: Trace) -> Experiment:
method async_gen (line 394) | async def async_gen(self, trace: Trace, loop: LoopBase) -> Experiment:
method reset (line 405) | def reset(self) -> None:
class HypothesisGen (line 414) | class HypothesisGen(ABC):
method __init__ (line 416) | def __init__(self, scen: Scenario) -> None:
method gen (line 420) | def gen(
class Hypothesis2Experiment (line 437) | class Hypothesis2Experiment(ABC, Generic[ASpecificExp]):
method convert (line 443) | def convert(self, hypothesis: Hypothesis, trace: Trace) -> ASpecificExp:
class Experiment2Feedback (line 451) | class Experiment2Feedback(ABC):
method __init__ (line 455) | def __init__(self, scen: Scenario) -> None:
method generate_feedback (line 459) | def generate_feedback(
FILE: rdagent/core/scenario.py
class Scenario (line 6) | class Scenario(ABC):
method background (line 14) | def background(self) -> str:
method get_source_data_desc (line 18) | def get_source_data_desc(self, task: Task | None = None) -> str: # no...
method source_data (line 27) | def source_data(self) -> str:
method rich_style_description (line 39) | def rich_style_description(self) -> str:
method get_scenario_all_desc (line 43) | def get_scenario_all_desc(
method get_runtime_environment (line 56) | def get_runtime_environment(self) -> str:
method experiment_setting (line 62) | def experiment_setting(self) -> str | None:
FILE: rdagent/core/utils.py
class RDAgentException (line 20) | class RDAgentException(Exception): # noqa: N818
class SingletonBaseClass (line 24) | class SingletonBaseClass:
method __new__ (line 32) | def __new__(cls, *args: Any, **kwargs: Any) -> Any:
method __reduce__ (line 47) | def __reduce__(self) -> NoReturn:
function parse_json (line 58) | def parse_json(response: str) -> Any:
function similarity (line 67) | def similarity(text1: str, text2: str) -> int:
function import_class (line 75) | def import_class(class_path: str) -> Any:
class CacheSeedGen (line 91) | class CacheSeedGen:
method __init__ (line 101) | def __init__(self) -> None:
method set_seed (line 104) | def set_seed(self, seed: int) -> None:
method get_next_seed (line 107) | def get_next_seed(self) -> int:
function _subprocess_wrapper (line 115) | def _subprocess_wrapper(f: Callable, seed: int, args: list) -> Any:
function multiprocessing_wrapper (line 124) | def multiprocessing_wrapper(func_calls: list[tuple[Callable, tuple]], n:...
function cache_with_pickle (line 156) | def cache_with_pickle(hash_func: Callable, post_process_func: Callable |...
FILE: rdagent/log/base.py
class Message (line 12) | class Message:
class Storage (line 25) | class Storage:
method log (line 43) | def log(
method iter_msg (line 67) | def iter_msg(self) -> Generator[Message, None, None]:
method truncate (line 74) | def truncate(self, time: datetime) -> None:
method __str__ (line 80) | def __str__(self) -> str:
class View (line 84) | class View:
method display (line 93) | def display(self, s: Storage, watch: bool = False) -> None:
FILE: rdagent/log/conf.py
class LogSettings (line 10) | class LogSettings(ExtendedBaseSettings):
method set_ui_server_port (line 22) | def set_ui_server_port(self, port: int | None) -> None:
method model_post_init (line 30) | def model_post_init(self, _context: Any, /) -> None:
FILE: rdagent/log/logger.py
class RDAgentLog (line 20) | class RDAgentLog(SingletonBaseClass):
method _configure_console_sinks (line 48) | def _configure_console_sinks(cls) -> None:
method _tag (line 59) | def _tag(self) -> str: # Get current tag
method _tag (line 63) | def _tag(self, value: str) -> None:
method __init__ (line 66) | def __init__(self) -> None:
method refresh_storages_from_settings (line 76) | def refresh_storages_from_settings(self) -> None:
method rebind_console_to_current_streams (line 82) | def rebind_console_to_current_streams(self) -> None:
method tag (line 92) | def tag(self, tag: str) -> Generator[None, None, None]:
method set_storages_path (line 106) | def set_storages_path(self, path: str | Path) -> None:
method truncate_storages (line 113) | def truncate_storages(self, time: datetime) -> None:
method get_pids (line 117) | def get_pids(self) -> str:
method log_object (line 132) | def log_object(self, obj: object, *, tag: str = "") -> None:
method _log (line 138) | def _log(self, level: str, msg: str, *, tag: str = "", raw: bool = Fal...
method info (line 146) | def info(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
method warning (line 149) | def warning(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
method error (line 152) | def error(self, msg: str, *, tag: str = "", raw: bool = False) -> None:
FILE: rdagent/log/mle_summary.py
function save_grade_info (line 25) | def save_grade_info(log_trace_path: Path):
function save_all_grade_info (line 45) | def save_all_grade_info(log_folder: str | Path) -> None:
function _get_loop_and_fn_after_hours (line 54) | def _get_loop_and_fn_after_hours(log_folder: Path, hours: int):
function summarize_folder (line 74) | def summarize_folder(log_folder: Path, hours: int | None = None) -> None:
function grade_summary (line 244) | def grade_summary(log_folder: str) -> None:
FILE: rdagent/log/server/app.py
class _YellowWarningFormatter (line 30) | class _YellowWarningFormatter(logging.Formatter):
method format (line 31) | def format(self, record: logging.LogRecord) -> str:
function _configure_app_logger (line 37) | def _configure_app_logger() -> None:
class RDAgentTask (line 52) | class RDAgentTask:
method __init__ (line 53) | def __init__(
method start (line 88) | def start(self) -> None:
method is_alive (line 92) | def is_alive(self) -> bool:
method get_end_code (line 95) | def get_end_code(self) -> int:
method stop (line 100) | def stop(self) -> None:
method _run (line 116) | def _run(self) -> None:
function _drain_user_requests_into_messages (line 175) | def _drain_user_requests_into_messages(task: RDAgentTask) -> None:
function favicon (line 202) | def favicon():
function _normalize_static_request_path (line 206) | def _normalize_static_request_path(fn: str) -> str:
function _get_or_create_task (line 213) | def _get_or_create_task(trace_id: str) -> RDAgentTask:
function _resolve_stdout_path (line 230) | def _resolve_stdout_path(trace_id: str) -> Path | None:
function read_trace (line 250) | def read_trace(log_path: Path, id: str = "") -> None:
function update_trace (line 284) | def update_trace():
function download_stdout_file (line 333) | def download_stdout_file():
function upload_file (line 351) | def upload_file():
function receive_msgs (line 454) | def receive_msgs():
function submit_user_interaction_response (line 474) | def submit_user_interaction_response():
function control_process (line 497) | def control_process():
function test (line 537) | def test():
function index (line 545) | def index():
function server_static_files (line 552) | def server_static_files(fn):
function main (line 556) | def main(port: int = 19899):
FILE: rdagent/log/server/debug_app.py
function favicon (line 27) | def favicon():
function update_trace (line 36) | def update_trace():
function upload_file (line 74) | def upload_file():
function receive_msgs (line 117) | def receive_msgs():
function control_process (line 136) | def control_process():
function test (line 150) | def test():
function index (line 156) | def index():
function server_static_files (line 163) | def server_static_files(fn):
function main (line 167) | def main(port: int = 19899):
FILE: rdagent/log/storage.py
function _remove_empty_dir (line 14) | def _remove_empty_dir(path: Path) -> None:
class FileStorage (line 28) | class FileStorage(Storage):
method __init__ (line 35) | def __init__(self, path: str | Path) -> None:
method log (line 38) | def log(
method iter_msg (line 79) | def iter_msg(self, tag: str | None = None, pattern: str | None = None)...
method truncate (line 107) | def truncate(self, time: datetime) -> None:
method __str__ (line 115) | def __str__(self) -> str:
FILE: rdagent/log/timer.py
class RDAgentTimer (line 8) | class RDAgentTimer:
method __init__ (line 9) | def __init__(self) -> None:
method reset (line 15) | def reset(self, all_duration: str | timedelta) -> None:
method restart_by_remain_time (line 41) | def restart_by_remain_time(self) -> None:
method add_duration (line 50) | def add_duration(self, duration: timedelta) -> None:
method is_timeout (line 56) | def is_timeout(self) -> bool:
method update_remain_time (line 63) | def update_remain_time(self) -> None:
method remain_time (line 68) | def remain_time(self) -> timedelta | None:
class RDAgentTimerWrapper (line 75) | class RDAgentTimerWrapper(SingletonBaseClass):
method __init__ (line 76) | def __init__(self) -> None:
method replace_timer (line 81) | def replace_timer(self, timer: RDAgentTimer) -> None:
FILE: rdagent/log/ui/app.py
function filter_log_folders (line 72) | def filter_log_folders(main_log_path):
function should_display (line 131) | def should_display(msg: Message):
function get_msgs_until (line 142) | def get_msgs_until(end_func: Callable[[Message], bool] = lambda _: True):
function refresh (line 258) | def refresh(same_trace: bool = False):
function evolving_feedback_window (line 292) | def evolving_feedback_window(wsf: FactorSingleFeedback | ModelSingleFeed...
function display_hypotheses (line 327) | def display_hypotheses(hypotheses: dict[int, Hypothesis], decisions: dic...
function metrics_window (line 370) | def metrics_window(df: pd.DataFrame, R: int, C: int, *, height: int = 30...
function summary_window (line 424) | def summary_window():
function tabs_hint (line 501) | def tabs_hint():
function tasks_window (line 508) | def tasks_window(tasks: list[FactorTask | ModelTask]):
function research_window (line 550) | def research_window():
function feedback_window (line 586) | def feedback_window():
function evolving_window (line 670) | def evolving_window():
function analyze_task_completion (line 866) | def analyze_task_completion():
FILE: rdagent/log/ui/conf.py
class UIBasePropSetting (line 6) | class UIBasePropSetting(ExtendedBaseSettings):
FILE: rdagent/log/ui/ds_summary.py
function curves_win (line 27) | def curves_win(summary: dict):
function all_summarize_win (line 71) | def all_summarize_win():
FILE: rdagent/log/ui/ds_trace.py
function convert_defaultdict_to_dict (line 68) | def convert_defaultdict_to_dict(d):
function load_data (line 74) | def load_data(log_path: Path):
function load_stdout (line 176) | def load_stdout(stdout_path: Path):
function task_win (line 185) | def task_win(task):
function workspace_win (line 199) | def workspace_win(workspace, cmp_workspace=None, cmp_name="last code."):
function show_text (line 247) | def show_text(text, lang=None):
function highlight_prompts_uri (line 257) | def highlight_prompts_uri(uri):
function llm_log_win (line 265) | def llm_log_win(llm_d: list):
function hypothesis_win (line 387) | def hypothesis_win(hypo):
function exp_gen_win (line 394) | def exp_gen_win(exp_gen_data, llm_data=None):
function evolving_win (line 408) | def evolving_win(data, key, llm_data=None, base_workspace=None):
function coding_win (line 452) | def coding_win(data, base_exp, llm_data: dict | None = None):
function running_win (line 488) | def running_win(data, base_exp, llm_data=None, last_sota_exp=None):
function feedback_win (line 524) | def feedback_win(fb_data, llm_data=None):
function sota_win (line 540) | def sota_win(sota_exp, trace):
function main_win (line 557) | def main_win(loop_id, llm_data=None):
function replace_ep_path (line 609) | def replace_ep_path(p: Path):
function get_llm_call_stats (line 621) | def get_llm_call_stats(llm_data: dict) -> tuple[int, int]:
function get_timeout_stats (line 643) | def get_timeout_stats(llm_data: dict):
function timedelta_to_str (line 667) | def timedelta_to_str(td: timedelta | None) -> str:
function summarize_win (line 677) | def summarize_win():
function stdout_win (line 1077) | def stdout_win(loop_id: int):
function get_folders_sorted (line 1101) | def get_folders_sorted(log_path, sort_by_time=False):
function get_state_data_range (line 1180) | def get_state_data_range(state_data):
FILE: rdagent/log/ui/ds_user_interact.py
function render_main_content (line 21) | def render_main_content():
function update_sessions (line 133) | def update_sessions():
function render_sidebar (line 151) | def render_sidebar():
FILE: rdagent/log/ui/dsapp.py
function convert_log_folder_str (line 10) | def convert_log_folder_str(lf: str) -> str:
function extract_amlt_name (line 16) | def extract_amlt_name(x: str) -> str:
FILE: rdagent/log/ui/llm_st.py
function get_folders_sorted (line 22) | def get_folders_sorted(log_path):
function load_data (line 52) | def load_data():
function show_text (line 83) | def show_text(text, lang=None):
function highlight_prompts_uri (line 93) | def highlight_prompts_uri(uri):
FILE: rdagent/log/ui/qlib_report_figure.py
class BaseGraph (line 9) | class BaseGraph:
method __init__ (line 12) | def __init__(
method _init_data (line 38) | def _init_data(self):
method _init_parameters (line 48) | def _init_parameters(self, **kwargs):
method get_instance_with_graph_parameters (line 62) | def get_instance_with_graph_parameters(graph_type: str = None, **kwargs):
method _get_layout (line 77) | def _get_layout(self) -> go.Layout:
method _get_data (line 84) | def _get_data(self) -> list:
method figure (line 99) | def figure(self) -> go.Figure:
class SubplotsGraph (line 110) | class SubplotsGraph:
method __init__ (line 116) | def __init__(
method _init_sub_graph_data (line 203) | def _init_sub_graph_data(self):
method _init_subplots_kwargs (line 229) | def _init_subplots_kwargs(self):
method _init_figure (line 246) | def _init_figure(self):
method figure (line 286) | def figure(self):
function _calculate_maximum (line 290) | def _calculate_maximum(df: pd.DataFrame, is_ex: bool = False):
function _calculate_mdd (line 306) | def _calculate_mdd(series):
function _calculate_report_data (line 316) | def _calculate_report_data(raw_df: pd.DataFrame) -> pd.DataFrame:
function report_figure (line 348) | def report_figure(df: pd.DataFrame) -> list | tuple:
FILE: rdagent/log/ui/st_fixed_container.py
function st_fixed_container (line 73) | def st_fixed_container(
FILE: rdagent/log/ui/storage.py
class WebStorage (line 13) | class WebStorage(Storage):
method __init__ (line 19) | def __init__(self, port: int, path: str) -> None:
method __str__ (line 30) | def __str__(self):
method log (line 33) | def log(self, obj: object, tag: str, timestamp: datetime | None = None...
method truncate (line 54) | def truncate(self, time: datetime) -> None:
method iter_msg (line 57) | def iter_msg(self, **kwargs: Any) -> Generator[Message, None, None]:
method _obj_to_json (line 66) | def _obj_to_json(
FILE: rdagent/log/ui/utils.py
function get_script_time (line 117) | def get_script_time(stdout_p: Path):
function _log_path_hash_func (line 134) | def _log_path_hash_func(log_path: Path) -> str:
function map_stat (line 146) | def map_stat(sota_mle_score: dict | None) -> str:
function get_best_report (line 164) | def get_best_report(log_path: Path) -> dict | None:
function _get_sota_exp_stat_hash_func (line 182) | def _get_sota_exp_stat_hash_func(log_path: Path, selector: Literal["auto...
function get_sota_exp_stat (line 186) | def get_sota_exp_stat(
function _get_score_stat_hash_func (line 262) | def _get_score_stat_hash_func(log_path: Path, sota_loop_id: int) -> str:
function get_score_stat (line 266) | def get_score_stat(log_path: Path, sota_loop_id: int) -> tuple[float | N...
function load_times_deprecated (line 365) | def load_times_deprecated(log_path: Path):
function load_times_info (line 382) | def load_times_info(log_path: Path) -> dict[int, dict[str, dict[Literal[...
function _log_folders_summary_hash_func (line 423) | def _log_folders_summary_hash_func(log_folder: str | Path, hours: int | ...
function get_summary_df (line 432) | def get_summary_df(log_folder: str | Path, hours: int | None = None) -> ...
function percent_df (line 668) | def percent_df(summary_df: pd.DataFrame, show_origin=True) -> pd.DataFrame:
function get_statistics_df (line 712) | def get_statistics_df(summary_df: pd.DataFrame) -> pd.DataFrame:
function curve_figure (line 783) | def curve_figure(scores: pd.DataFrame) -> go.Figure:
function lite_curve_figure (line 818) | def lite_curve_figure(summary):
function trace_figure (line 884) | def trace_figure(trace: Trace, merge_loops: list = []):
function timeline_figure (line 975) | def timeline_figure(times_dict: dict[int, dict[str, dict[Literal["start_...
function compare (line 1089) | def compare(
FILE: rdagent/log/ui/web.py
class WebView (line 29) | class WebView(View):
method __init__ (line 30) | def __init__(self, ui: "StWindow"):
method display (line 35) | def display(self, s: Storage, watch: bool = False):
class StWindow (line 42) | class StWindow:
method __init__ (line 43) | def __init__(self, container: "DeltaGenerator"):
method consume_msg (line 46) | def consume_msg(self, msg: Message):
class LLMWindow (line 51) | class LLMWindow(StWindow):
method __init__ (line 52) | def __init__(self, container: "DeltaGenerator", session_name: str = "c...
method consume_msg (line 56) | def consume_msg(self, msg: Message):
class ProgressTabsWindow (line 60) | class ProgressTabsWindow(StWindow):
method __init__ (line 65) | def __init__(
method consume_msg (line 78) | def consume_msg(self, msg: Message):
class ObjectsTabsWindow (line 102) | class ObjectsTabsWindow(StWindow):
method __init__ (line 103) | def __init__(
method consume_msg (line 115) | def consume_msg(self, msg: Message):
class RoundTabsWindow (line 145) | class RoundTabsWindow(StWindow):
method __init__ (line 146) | def __init__(
method consume_msg (line 161) | def consume_msg(self, msg: Message):
class HypothesisWindow (line 169) | class HypothesisWindow(StWindow):
method consume_msg (line 170) | def consume_msg(self, msg: Message | Hypothesis):
class HypothesisFeedbackWindow (line 179) | class HypothesisFeedbackWindow(StWindow):
method consume_msg (line 180) | def consume_msg(self, msg: Message | HypothesisFeedback):
class FactorTaskWindow (line 192) | class FactorTaskWindow(StWindow):
method consume_msg (line 193) | def consume_msg(self, msg: Message | FactorTask):
class ModelTaskWindow (line 206) | class ModelTaskWindow(StWindow):
method consume_msg (line 207) | def consume_msg(self, msg: Message | ModelTask):
class FactorFeedbackWindow (line 220) | class FactorFeedbackWindow(StWindow):
method consume_msg (line 221) | def consume_msg(self, msg: Message | FactorSingleFeedback):
class ModelFeedbackWindow (line 237) | class ModelFeedbackWindow(StWindow):
method consume_msg (line 238) | def consume_msg(self, msg: Message | ModelSingleFeedback):
class WorkspaceWindow (line 256) | class WorkspaceWindow(StWindow):
method __init__ (line 257) | def __init__(self, container: "DeltaGenerator", show_task_info: bool =...
method consume_msg (line 261) | def consume_msg(self, msg: Message | FactorFBWorkspace | ModelFBWorksp...
class QlibFactorExpWindow (line 285) | class QlibFactorExpWindow(StWindow):
method __init__ (line 286) | def __init__(self, container: DeltaGenerator, show_task_info: bool = F...
method consume_msg (line 290) | def consume_msg(self, msg: Message | QlibFactorExperiment):
class QlibModelExpWindow (line 318) | class QlibModelExpWindow(StWindow):
method __init__ (line 319) | def __init__(self, container: DeltaGenerator, show_task_info: bool = F...
method consume_msg (line 323) | def consume_msg(self, msg: Message | QlibModelExperiment):
class SimpleTraceWindow (line 345) | class SimpleTraceWindow(StWindow):
method __init__ (line 346) | def __init__(
method consume_msg (line 358) | def consume_msg(self, msg: Message):
function mock_msg (line 433) | def mock_msg(obj) -> Message:
class TraceObjWindow (line 437) | class TraceObjWindow(StWindow):
method __init__ (line 438) | def __init__(self, container: "DeltaGenerator" = st.container()):
method consume_msg (line 441) | def consume_msg(self, msg: Message | Trace):
class ResearchWindow (line 457) | class ResearchWindow(StWindow):
method consume_msg (line 458) | def consume_msg(self, msg: Message):
class EvolvingWindow (line 477) | class EvolvingWindow(StWindow):
method __init__ (line 478) | def __init__(self, container: "DeltaGenerator"):
method consume_msg (line 482) | def consume_msg(self, msg: Message):
class DevelopmentWindow (line 519) | class DevelopmentWindow(StWindow):
method __init__ (line 520) | def __init__(self, container: "DeltaGenerator"):
method consume_msg (line 528) | def consume_msg(self, msg: Message):
class FeedbackWindow (line 533) | class FeedbackWindow(StWindow):
method __init__ (line 534) | def __init__(self, container: "DeltaGenerator"):
method consume_msg (line 537) | def consume_msg(self, msg: Message):
class SingleRDLoopWindow (line 550) | class SingleRDLoopWindow(StWindow):
method __init__ (line 551) | def __init__(self, container: "DeltaGenerator"):
method consume_msg (line 558) | def consume_msg(self, msg: Message):
class TraceWindow (line 568) | class TraceWindow(StWindow):
method __init__ (line 569) | def __init__(
method consume_msg (line 598) | def consume_msg(self, msg: Message):
FILE: rdagent/log/utils/__init__.py
class LogColors (line 9) | class LogColors:
method get_all_colors (line 30) | def get_all_colors(cls: type["LogColors"]) -> list:
method render (line 35) | def render(self, text: str, color: str = "", style: str = "") -> str:
method remove_ansi_codes (line 55) | def remove_ansi_codes(s: str) -> str:
class CallerInfo (line 63) | class CallerInfo(TypedDict):
function get_caller_info (line 69) | def get_caller_info(level: int = 2) -> CallerInfo:
function is_valid_session (line 83) | def is_valid_session(log_path: Path) -> bool:
function extract_loopid_func_name (line 87) | def extract_loopid_func_name(tag: str) -> tuple[str, str] | tuple[None, ...
function extract_evoid (line 93) | def extract_evoid(tag: str) -> str | None:
function extract_json (line 99) | def extract_json(log_content: str) -> dict | None:
function gen_datetime (line 106) | def gen_datetime(dt: datetime | None = None) -> datetime:
function dict_get_with_warning (line 117) | def dict_get_with_warning(d: dict, key: str, default: Any = None) -> Any:
FILE: rdagent/log/utils/folder.py
function get_first_session_file_after_duration (line 14) | def get_first_session_file_after_duration(log_folder: str | Path, durati...
function first_li_si_after_one_time (line 40) | def first_li_si_after_one_time(log_path: Path, hours: int = 12) -> tuple...
FILE: rdagent/oai/backend/base.py
class JSONParser (line 37) | class JSONParser:
method __init__ (line 40) | def __init__(self, add_json_in_prompt: bool = False) -> None:
method parse (line 49) | def parse(self, content: str) -> str:
method _direct_parse (line 71) | def _direct_parse(self, content: str) -> str:
method _extract_from_code_block (line 81) | def _extract_from_code_block(self, content: str) -> str:
method _fix_python_syntax (line 90) | def _fix_python_syntax(self, content: str) -> str:
method _extract_with_fix_combined (line 95) | def _extract_with_fix_combined(self, content: str) -> str:
method _fix_python_booleans (line 107) | def _fix_python_booleans(json_str: str) -> str:
method _extract_first_json (line 132) | def _extract_first_json(response: str) -> str:
class CodeBlockParser (line 139) | class CodeBlockParser:
method __init__ (line 150) | def __init__(self, language: str = "python", fallback_to_raw: bool = F...
method _get_language_aliases (line 161) | def _get_language_aliases(self, language: str) -> List[str]:
method parse (line 168) | def parse(self, content: str) -> str:
class SQliteLazyCache (line 195) | class SQliteLazyCache(SingletonBaseClass):
method __init__ (line 196) | def __init__(self, cache_location: str) -> None:
method chat_get (line 230) | def chat_get(self, key: str) -> str | None:
method embedding_get (line 236) | def embedding_get(self, key: str) -> list | dict | str | None:
method chat_set (line 242) | def chat_set(self, key: str, value: str) -> None:
method embedding_set (line 251) | def embedding_set(self, content_to_embedding_dict: dict) -> None:
method message_get (line 260) | def message_get(self, conversation_id: str) -> list[dict[str, Any]]:
method message_set (line 265) | def message_set(self, conversation_id: str, message_value: list[dict[s...
class SessionChatHistoryCache (line 274) | class SessionChatHistoryCache(SingletonBaseClass):
method __init__ (line 275) | def __init__(self) -> None:
method message_get (line 279) | def message_get(self, conversation_id: str) -> list[dict[str, Any]]:
method message_set (line 282) | def message_set(self, conversation_id: str, message_value: list[dict[s...
class ChatSession (line 286) | class ChatSession:
method __init__ (line 287) | def __init__(self, api_backend: Any, conversation_id: str | None = Non...
method build_chat_completion_message (line 292) | def build_chat_completion_message(self, user_prompt: str) -> list[dict...
method build_chat_completion_message_and_calculate_token (line 305) | def build_chat_completion_message_and_calculate_token(self, user_promp...
method build_chat_completion (line 309) | def build_chat_completion(self, user_prompt: str, *args, **kwargs) -> ...
method get_conversation_id (line 345) | def get_conversation_id(self) -> str:
method display_history (line 348) | def display_history(self) -> None:
class APIBackend (line 353) | class APIBackend(ABC):
method __init__ (line 360) | def __init__(
method build_chat_session (line 381) | def build_chat_session(
method _build_messages (line 392) | def _build_messages(
method _build_log_messages (line 429) | def _build_log_messages(self, messages: list[dict[str, Any]]) -> str:
method build_messages_and_create_chat_completion (line 440) | def build_messages_and_create_chat_completion( # type: ignore[no-unty...
method create_embedding (line 493) | def create_embedding(self, input_content: str | list[str], *args, **kw...
method build_messages_and_calculate_token (line 505) | def build_messages_and_calculate_token(
method _try_create_chat_completion_or_embedding (line 520) | def _try_create_chat_completion_or_embedding( # type: ignore[no-untyp...
method _add_json_in_prompt (line 615) | def _add_json_in_prompt(self, messages: list[dict[str, Any]]) -> None:
method _create_chat_completion_auto_continue (line 625) | def _create_chat_completion_auto_continue(
method _create_embedding_with_cache (line 733) | def _create_embedding_with_cache(
method supports_response_schema (line 757) | def supports_response_schema(self) -> bool:
method _calculate_token_from_messages (line 764) | def _calculate_token_from_messages(self, messages: list[dict[str, Any]...
method _create_embedding_inner_function (line 771) | def _create_embedding_inner_function(self, input_content_list: list[st...
method _create_chat_completion_inner_function (line 778) | def _create_chat_completion_inner_function( # type: ignore[no-untyped...
method chat_token_limit (line 791) | def chat_token_limit(self) -> int:
FILE: rdagent/oai/backend/deprec.py
class ConvManager (line 64) | class ConvManager:
method __init__ (line 70) | def __init__(
method _rotate_files (line 79) | def _rotate_files(self) -> None:
method append (line 92) | def append(self, conv: tuple[list, str]) -> None:
class DeprecBackend (line 99) | class DeprecBackend(APIBackend):
method __init__ (line 109) | def __init__( # noqa: C901, PLR0912, PLR0915
method _get_encoder (line 238) | def _get_encoder(self) -> tiktoken.Encoding:
method supports_response_schema (line 266) | def supports_response_schema(self) -> bool:
method _create_embedding_inner_function (line 273) | def _create_embedding_inner_function(self, input_content_list: list[st...
method _create_chat_completion_inner_function (line 294) | def _create_chat_completion_inner_function( # type: ignore[no-untyped...
method _calculate_token_from_messages (line 467) | def _calculate_token_from_messages(self, messages: list[dict[str, Any]...
FILE: rdagent/oai/backend/litellm.py
function _reduce_no_init (line 25) | def _reduce_no_init(exc: Exception) -> tuple:
class LiteLLMSettings (line 35) | class LiteLLMSettings(LLMSettings):
class Config (line 37) | class Config:
class LiteLLMAPIBackend (line 48) | class LiteLLMAPIBackend(APIBackend):
method __init__ (line 53) | def __init__(self, *args: Any, **kwargs: Any) -> None:
method _calculate_token_from_messages (line 60) | def _calculate_token_from_messages(self, messages: list[dict[str, Any]...
method _create_embedding_inner_function (line 71) | def _create_embedding_inner_function(self, input_content_list: list[st...
class CompleteKwargs (line 89) | class CompleteKwargs(TypedDict):
method get_complete_kwargs (line 95) | def get_complete_kwargs(self) -> CompleteKwargs:
method _create_chat_completion_inner_function (line 127) | def _create_chat_completion_inner_function( # type: ignore[no-untyped...
method supports_response_schema (line 226) | def supports_response_schema(self) -> bool:
method chat_token_limit (line 233) | def chat_token_limit(self) -> int:
FILE: rdagent/oai/backend/pydantic_ai.py
function get_agent_model (line 27) | def get_agent_model() -> OpenAIChatModel:
FILE: rdagent/oai/llm_conf.py
class LLMSettings (line 11) | class LLMSettings(ExtendedBaseSettings):
FILE: rdagent/oai/llm_utils.py
function calculate_embedding_distance_between_str_list (line 13) | def calculate_embedding_distance_between_str_list(
function get_api_backend (line 35) | def get_api_backend(*args: Any, **kwargs: Any) -> BaseAPIBackend: # TOD...
FILE: rdagent/oai/utils/embedding.py
function get_embedding_max_tokens (line 27) | def get_embedding_max_tokens(model: str) -> int:
function trim_text_for_embedding (line 63) | def trim_text_for_embedding(text: str, model: str, max_tokens: Optional[...
function truncate_content_list (line 118) | def truncate_content_list(content_list: list[str], model: str) -> list[s...
FILE: rdagent/scenarios/data_science/debug/data.py
class DataHandler (line 18) | class DataHandler:
method load (line 21) | def load(self, path) -> pd.DataFrame:
method dump (line 24) | def dump(self, df: pd.DataFrame, path):
class GenericDataHandler (line 28) | class GenericDataHandler(DataHandler):
method load (line 34) | def load(self, path) -> pd.DataFrame:
method dump (line 65) | def dump(self, df: pd.DataFrame | dict, path):
class DataReducer (line 93) | class DataReducer:
method __init__ (line 96) | def __init__(self, min_frac=0.02, min_num=5):
method reduce (line 101) | def reduce(self, df: pd.DataFrame) -> pd.DataFrame:
class RandDataReducer (line 105) | class RandDataReducer(DataReducer):
method reduce (line 111) | def reduce(self, df: pd.DataFrame, frac: float = None) -> pd.DataFrame:
class FolderReducer (line 119) | class FolderReducer(DataReducer):
method reduce (line 124) | def reduce(self, array: list, frac: float = None) -> list:
class FileReducer (line 141) | class FileReducer(DataReducer):
method reduce (line 146) | def reduce(self, files: list[Path]) -> list:
class FileKeepReducer (line 158) | class FileKeepReducer(DataReducer):
method reduce (line 163) | def reduce(self, files: list[Path]) -> list:
class SingleFileReducer (line 181) | class SingleFileReducer(DataReducer):
method reduce (line 186) | def reduce(self, files: list[Path]) -> list:
class UniqueIDDataReducer (line 191) | class UniqueIDDataReducer(DataReducer):
method reduce (line 192) | def reduce(self, df: pd.DataFrame) -> pd.DataFrame:
class JsonReducer (line 237) | class JsonReducer(DataReducer):
method extract_filename (line 239) | def extract_filename(self, item: Any) -> Optional[str]:
method reduce (line 255) | def reduce(self, data: dict) -> dict:
method _find_all_lists (line 274) | def _find_all_lists(
method _sample_list (line 299) | def _sample_list(self, lst: List[Any]) -> List[Any]:
class DataSampler (line 306) | class DataSampler:
method __init__ (line 309) | def __init__(self, data_folder, sample_folder, reducer):
method sample (line 316) | def sample(self) -> None:
class DefaultSampler (line 320) | class DefaultSampler(DataSampler):
method sample (line 321) | def sample(self) -> None:
class FolderSampler (line 467) | class FolderSampler(DataSampler):
method sample (line 472) | def sample(self) -> None:
class SingleFilePerFolderSampler (line 520) | class SingleFilePerFolderSampler(DataSampler):
method sample (line 526) | def sample(self) -> None:
function copy_file (line 558) | def copy_file(src_fp, target_folder, data_folder):
function copy_folder (line 569) | def copy_folder(src_fp, target_folder, data_folder):
function count_files_in_folder (line 579) | def count_files_in_folder(files_to_process):
function map_competition (line 592) | def map_competition(competition: str) -> tuple[DataReducer, DataSampler]:
function create_debug_data (line 605) | def create_debug_data(
FILE: rdagent/scenarios/data_science/dev/feedback.py
class DSExperiment2Feedback (line 23) | class DSExperiment2Feedback(Experiment2Feedback):
method __init__ (line 24) | def __init__(self, scen: Scenario, version: str = "exp_feedback") -> N...
method generate_feedback (line 28) | def generate_feedback(self, exp: DSExperiment, trace: DSTrace) -> Expe...
FILE: rdagent/scenarios/data_science/dev/runner/__init__.py
class DSRunnerCoSTEERSettings (line 29) | class DSRunnerCoSTEERSettings(CoSTEERSettings):
class Config (line 32) | class Config:
class DSRunnerMultiProcessEvolvingStrategy (line 42) | class DSRunnerMultiProcessEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 44) | def implement_one_task(
method assign_code_list_to_evo (line 116) | def assign_code_list_to_evo(self, code_list: list[dict[str, str]], evo):
class DSCoSTEERRunner (line 135) | class DSCoSTEERRunner(CoSTEER):
method __init__ (line 136) | def __init__(
method get_develop_max_seconds (line 169) | def get_develop_max_seconds(self) -> int | None:
method should_use_new_evo (line 175) | def should_use_new_evo(self, base_fb: CoSTEERMultiFeedback | None, new...
method develop (line 196) | def develop(self, exp):
FILE: rdagent/scenarios/data_science/dev/runner/eval.py
class DSRunnerFeedback (line 34) | class DSRunnerFeedback(CoSTEERSingleFeedback):
method is_acceptable (line 45) | def is_acceptable(self) -> bool:
method __str__ (line 50) | def __str__(self) -> str:
class DSRunnerEvaluator (line 72) | class DSRunnerEvaluator(CoSTEEREvaluator):
method evaluate (line 74) | def evaluate(
FILE: rdagent/scenarios/data_science/example/arf-12-hours-prediction-task/sample.py
function sample_and_copy_subfolder (line 10) | def sample_and_copy_subfolder(
function copy_other_file (line 46) | def copy_other_file(source: Path, target: Path):
function create_debug_data (line 63) | def create_debug_data(
FILE: rdagent/scenarios/data_science/example/eval/arf-12-hours-prediction-task/grade.py
function prepare_for_auroc_metric (line 7) | def prepare_for_auroc_metric(submission: pd.DataFrame, answers: pd.DataF...
function grade (line 46) | def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
FILE: rdagent/scenarios/data_science/example/eval/playground-series-s4e9/grade.py
class InvalidSubmissionError (line 9) | class InvalidSubmissionError(Exception):
function prepare_for_metric (line 17) | def prepare_for_metric(submission: pd.DataFrame, answers: pd.DataFrame) ...
function grade (line 42) | def grade(submission: pd.DataFrame, answers: pd.DataFrame) -> float:
FILE: rdagent/scenarios/data_science/example/source_data/playground-series-s4e9/prepare.py
function prepare (line 7) | def prepare(raw: Path, public: Path, private: Path):
FILE: rdagent/scenarios/data_science/experiment/experiment.py
class DSExperiment (line 11) | class DSExperiment(Experiment[Task, FBWorkspace, FBWorkspace]):
method __init__ (line 12) | def __init__(self, pending_tasks_list: list, hypothesis_candidates: li...
method set_user_instructions (line 27) | def set_user_instructions(self, user_instructions: UserInstructions | ...
method is_ready_to_run (line 35) | def is_ready_to_run(self) -> bool:
method set_local_selection (line 42) | def set_local_selection(self, local_selection: tuple[int, ...]) -> None:
FILE: rdagent/scenarios/data_science/interactor/__init__.py
class DSInteractor (line 17) | class DSInteractor(Interactor[DSExperiment]):
method dump_and_wait_for_user_input (line 19) | def dump_and_wait_for_user_input(
method interact (line 32) | def interact(self, exp: DSExperiment, trace: DSTrace) -> DSExperiment:
class FBDSInteractor (line 70) | class FBDSInteractor(DSInteractor):
method dump_and_wait_for_user_input (line 71) | def dump_and_wait_for_user_input(
FILE: rdagent/scenarios/data_science/loop.py
function clean_workspace (line 43) | def clean_workspace(workspace_root: Path) -> None:
function backup_folder (line 64) | def backup_folder(path: str | Path) -> Path:
class DataScienceRDLoop (line 87) | class DataScienceRDLoop(RDLoop):
method __init__ (line 96) | def __init__(self, PROP_SETTING: BasePropSetting):
method direct_exp_gen (line 139) | async def direct_exp_gen(self, prev_out: dict[str, Any]):
method coding (line 154) | def coding(self, prev_out: dict[str, Any]):
method running (line 177) | def running(self, prev_out: dict[str, Any]):
method feedback (line 187) | def feedback(self, prev_out: dict[str, Any]) -> ExperimentFeedback:
method record (line 211) | def record(self, prev_out: dict[str, Any]):
method _check_exit_conditions_on_step (line 353) | def _check_exit_conditions_on_step(self, loop_id: Optional[int] = None...
method load (line 359) | def load(
method dump (line 373) | def dump(self, path: str | Path) -> None:
FILE: rdagent/scenarios/data_science/proposal/exp_gen/base.py
class DSHypothesis (line 13) | class DSHypothesis(Hypothesis):
method __init__ (line 14) | def __init__(
method __str__ (line 37) | def __str__(self) -> str:
class DSTrace (line 55) | class DSTrace(Trace[DataScienceScen, KnowledgeBase]):
method __init__ (line 56) | def __init__(self, scen: DataScienceScen, knowledge_base: KnowledgeBas...
method should_inject_diversity (line 66) | def should_inject_diversity(self, current_selection: tuple[int, ...] |...
method register_uncommitted_exp (line 81) | def register_uncommitted_exp(self, exp: DSExperiment, loop_id: int):
method deregister_uncommitted_exp (line 84) | def deregister_uncommitted_exp(self, loop_id: int):
method set_sota_exp_to_submit (line 88) | def set_sota_exp_to_submit(self, exp: DSExperiment) -> None:
method sub_trace_count (line 92) | def sub_trace_count(self) -> int:
method get_leaves (line 95) | def get_leaves(self) -> list[int, ...]:
method get_sibling_exps (line 114) | def get_sibling_exps(self, current_selection: tuple[int, ...] | None =...
method sync_dag_parent_and_hist (line 139) | def sync_dag_parent_and_hist(
method retrieve_search_list (line 165) | def retrieve_search_list(
method next_incomplete_component (line 194) | def next_incomplete_component(
method has_component (line 212) | def has_component(
method experiment_and_feedback_list_after_init (line 221) | def experiment_and_feedback_list_after_init(
method sota_experiment_fb (line 267) | def sota_experiment_fb(
method sota_experiment (line 287) | def sota_experiment(
method last_successful_exp (line 297) | def last_successful_exp(
method last_exp (line 312) | def last_exp(
method last_exp_fb (line 323) | def last_exp_fb(
method last_runnable_exp_fb (line 336) | def last_runnable_exp_fb(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/diversity_strategy.py
class DiversityContextStrategy (line 10) | class DiversityContextStrategy(ABC):
method should_inject (line 17) | def should_inject(self, trace: DSTrace, local_selection: tuple[int, .....
class InjectAtRootStrategy (line 32) | class InjectAtRootStrategy(DiversityContextStrategy):
method should_inject (line 37) | def should_inject(self, trace: DSTrace, local_selection: tuple[int, .....
class InjectUntilSOTAGainedStrategy (line 42) | class InjectUntilSOTAGainedStrategy(DiversityContextStrategy):
method should_inject (line 48) | def should_inject(self, trace: DSTrace, local_selection: tuple[int, .....
class AlwaysInjectStrategy (line 61) | class AlwaysInjectStrategy(DiversityContextStrategy):
method should_inject (line 66) | def should_inject(self, trace: DSTrace, local_selection: tuple[int, .....
FILE: rdagent/scenarios/data_science/proposal/exp_gen/draft/draft.py
class DSDraftExpGen (line 26) | class DSDraftExpGen(ExpGen):
method _init_task_gen (line 27) | def _init_task_gen(
method gen (line 61) | def gen(
class DSDraftV2ExpGen (line 132) | class DSDraftV2ExpGen(ExpGen):
method __init__ (line 133) | def __init__(self, *args, **kwargs):
method tag_gen (line 137) | def tag_gen(self, scenario_desc: str) -> str:
method knowledge_gen (line 150) | def knowledge_gen(self) -> str:
method hypothesis_gen (line 157) | def hypothesis_gen(
method task_gen (line 183) | def task_gen(
method gen (line 241) | def gen(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/idea_pool.py
class DSIdea (line 18) | class DSIdea:
method __init__ (line 19) | def __init__(self, raw_knowledge: Dict | str) -> None:
method __str__ (line 41) | def __str__(self) -> str:
method to_formatted_str (line 52) | def to_formatted_str(self) -> str:
class DSKnowledgeBase (line 56) | class DSKnowledgeBase(UndirectedGraph):
method __init__ (line 57) | def __init__(self, path: str | Path | None = None, idea_pool_json_path...
method add_idea (line 64) | def add_idea(self, idea: List[DSIdea] | DSIdea) -> None:
method build_idea_pool (line 97) | def build_idea_pool(self, idea_pool_json_path: str | Path):
method sample_ideas (line 116) | def sample_ideas(
method update_pickled_problem (line 182) | def update_pickled_problem(self, problems: Dict, pickled_problem_name:...
FILE: rdagent/scenarios/data_science/proposal/exp_gen/merge.py
class MergeExpGen (line 26) | class MergeExpGen(ExpGen):
method gen (line 27) | def gen(
class ExpGen2Hypothesis (line 99) | class ExpGen2Hypothesis(DSProposalV2ExpGen):
method hypothesis_gen (line 101) | def hypothesis_gen(
method get_exp_index (line 131) | def get_exp_index(self, trace: DSTrace) -> int:
method gen (line 148) | def gen(
class ExpGen2TraceAndMerge (line 239) | class ExpGen2TraceAndMerge(ExpGen):
method __init__ (line 241) | def __init__(self, *args, **kwargs):
method gen (line 246) | def gen(
class MergeExpGen_MultiTrace (line 275) | class MergeExpGen_MultiTrace(ExpGen):
method gen (line 276) | def gen(
class ExpGen2TraceAndMergeV2 (line 354) | class ExpGen2TraceAndMergeV2(ExpGen):
method __init__ (line 355) | def __init__(self, *args, **kwargs):
method reset_exp_gen_version (line 361) | def reset_exp_gen_version(self, version: str = "v2"):
method gen (line 370) | def gen(
class ExpGen2TraceAndMergeV3 (line 414) | class ExpGen2TraceAndMergeV3(ExpGen):
method __init__ (line 415) | def __init__(self, *args, **kwargs):
method gen (line 420) | def gen(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/naive.py
class NaiveExpGen (line 14) | class NaiveExpGen(ExpGen):
method gen (line 15) | def gen(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/package_info.py
function get_installed_packages (line 5) | def get_installed_packages():
function print_filtered_packages (line 9) | def print_filtered_packages(installed_packages, filtered_packages):
function get_python_packages (line 24) | def get_python_packages():
FILE: rdagent/scenarios/data_science/proposal/exp_gen/planner/__init__.py
class DSExperimentPlan (line 9) | class DSExperimentPlan(ExperimentPlan):
method __init__ (line 15) | def __init__(self):
class DSExpPlannerHandCraft (line 22) | class DSExpPlannerHandCraft(ExpPlanner[DSExperimentPlan]):
method plan (line 27) | def plan(self, trace: DSTrace) -> DSExperimentPlan:
FILE: rdagent/scenarios/data_science/proposal/exp_gen/proposal.py
function get_component (line 85) | def get_component(name: str) -> Dict[str, Any]:
class ScenarioChallengeCategory (line 98) | class ScenarioChallengeCategory(str, Enum):
class ScenarioChallengeDetail (line 103) | class ScenarioChallengeDetail(BaseModel):
class ScenarioAnalysis (line 121) | class ScenarioAnalysis(BaseModel):
class ScenarioChallenges (line 134) | class ScenarioChallenges(BaseModel):
class TraceAnalysisDetail (line 145) | class TraceAnalysisDetail(BaseModel):
class TraceAnalysis (line 155) | class TraceAnalysis(BaseModel):
class TraceChallengeDetail (line 168) | class TraceChallengeDetail(BaseModel):
class TraceChallenges (line 195) | class TraceChallenges(BaseModel):
class HypothesisComponent (line 210) | class HypothesisComponent(str, Enum):
class HypothesisEvaluationReasoningScore (line 218) | class HypothesisEvaluationReasoningScore(BaseModel):
class HypothesisEvaluation (line 225) | class HypothesisEvaluation(BaseModel):
class HypothesisDetail (line 243) | class HypothesisDetail(BaseModel):
class HypothesisSimple (line 260) | class HypothesisSimple(BaseModel):
class HypothesisList (line 267) | class HypothesisList(BaseModel):
class CodingSketch (line 276) | class CodingSketch(BaseModel):
function draft_exp_in_decomposition (line 306) | def draft_exp_in_decomposition(scen: Scenario, trace: DSTrace) -> None |...
class DSProposalV1ExpGen (line 317) | class DSProposalV1ExpGen(ExpGen):
method gen (line 318) | def gen(
class DSProposalV2ExpGen (line 492) | class DSProposalV2ExpGen(ExpGen):
method __init__ (line 493) | def __init__(self, *args, **kwargs):
method identify_scenario_problem (line 497) | def identify_scenario_problem(
method identify_feedback_problem (line 532) | def identify_feedback_problem(
method identify_problem (line 569) | def identify_problem(
method hypothesis_gen (line 609) | def hypothesis_gen(
method hypothesis_critique (line 705) | def hypothesis_critique(
method hypothesis_rewrite (line 774) | def hypothesis_rewrite(
method compute_top_scores (line 859) | def compute_top_scores(
method select_hypothesis (line 893) | def select_hypothesis(
method _cosine_similarity_matrix_torch (line 924) | def _cosine_similarity_matrix_torch(self, A, B):
method _prob_dis_torch (line 932) | def _prob_dis_torch(
method _get_path (line 987) | def _get_path(self, node, parent_nodes):
method _get_current_exp_score_list (line 995) | def _get_current_exp_score_list(self, trace, competition):
method _llm_select_extra_hypo (line 1028) | def _llm_select_extra_hypo(self, trace: DSTrace) -> list[tuple[str, fl...
method hypothesis_select_with_llm (line 1055) | def hypothesis_select_with_llm(
method hypothesis_rank (line 1158) | def hypothesis_rank(
method task_gen (line 1184) | def task_gen(
method get_all_hypotheses (line 1283) | def get_all_hypotheses(self, problem_dict: dict, hypothesis_dict: dict...
method gen (line 1300) | def gen(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/router/__init__.py
class ParallelMultiTraceExpGen (line 36) | class ParallelMultiTraceExpGen(ExpGen):
method __init__ (line 45) | def __init__(self, *args, **kwargs):
method gen (line 58) | def gen(
method reset (line 67) | def reset(self) -> None:
method async_gen (line 70) | async def async_gen(self, trace: DSTrace, loop: LoopBase) -> DSExperim...
FILE: rdagent/scenarios/data_science/proposal/exp_gen/select/expand.py
class LatestCKPSelector (line 13) | class LatestCKPSelector(CheckpointSelector):
method __init__ (line 18) | def __init__(
method get_selection (line 23) | def get_selection(self, trace: Trace) -> tuple[int, ...]:
class LimitTimeCKPSelector (line 28) | class LimitTimeCKPSelector(CheckpointSelector):
method __init__ (line 33) | def __init__(
method set_time_limit (line 41) | def set_time_limit(self):
method get_selection (line 52) | def get_selection(self, trace: Trace) -> tuple[int, ...]:
class SOTAJumpCKPSelector (line 100) | class SOTAJumpCKPSelector(CheckpointSelector):
method __init__ (line 107) | def __init__(
method get_selection (line 118) | def get_selection(self, trace: Trace) -> tuple[int, ...]:
class BackJumpCKPSelector (line 156) | class BackJumpCKPSelector(CheckpointSelector):
method __init__ (line 164) | def __init__(
method get_selection (line 175) | def get_selection(self, trace: Trace) -> tuple[int, ...]:
FILE: rdagent/scenarios/data_science/proposal/exp_gen/select/submit.py
class GlobalSOTASelector (line 43) | class GlobalSOTASelector(SOTAexpSelector):
method __init__ (line 48) | def __init__(self):
method get_sota_exp_to_submit (line 51) | def get_sota_exp_to_submit(self, trace: Trace, **kwargs) -> DSExperime...
class AutoSOTAexpSelector (line 58) | class AutoSOTAexpSelector(SOTAexpSelector):
method __init__ (line 64) | def __init__(self):
method get_sota_exp_to_submit (line 68) | def get_sota_exp_to_submit(self, trace: Trace, **kwargs) -> DSExperime...
method collect_sota_candidates (line 130) | def collect_sota_candidates(self, trace: Trace) -> list:
class BestValidSelector (line 158) | class BestValidSelector(SOTAexpSelector):
method __init__ (line 164) | def __init__(self, num_candidates: int = 1, use_decision: bool = True,...
method get_sota_exp_to_submit (line 178) | def get_sota_exp_to_submit(self, trace: Trace, **kwargs) -> DSExperime...
method collect_sota_candidates (line 187) | def collect_sota_candidates(self, trace: Trace) -> list[DSExperiment] ...
class ValidationSelector (line 263) | class ValidationSelector(SOTAexpSelector):
method __init__ (line 271) | def __init__(
method get_sota_exp_to_submit (line 289) | def get_sota_exp_to_submit(self, trace: Trace) -> DSExperiment | None:
method print_code (line 345) | def print_code(self, data_py_code: str, grade_py_code: str):
method _prepare_validation_scripts (line 353) | def _prepare_validation_scripts(
method _generate_and_run_script (line 429) | def _generate_and_run_script(
function process_experiment (line 521) | def process_experiment(
function _parsing_score (line 575) | def _parsing_score(grade_stdout: str) -> Optional[float]:
function check_hit (line 602) | def check_hit(selected_exp: DSExperiment, trace: Trace, sota_result: Dic...
function try_get_loop_id (line 622) | def try_get_loop_id(trace: Trace, exp: DSExperiment):
function extract_tar (line 629) | def extract_tar(tar_path: str, to_dir: str = "log") -> str:
function evaluate_one_trace (line 639) | def evaluate_one_trace(
function select_on_existing_trace (line 744) | def select_on_existing_trace(
FILE: rdagent/scenarios/data_science/proposal/exp_gen/trace_scheduler.py
class TraceScheduler (line 18) | class TraceScheduler(ABC):
method next (line 25) | async def next(self, trace: DSTrace) -> tuple[int, ...]:
method reset (line 43) | def reset(self) -> None:
class BaseScheduler (line 50) | class BaseScheduler(TraceScheduler):
method __init__ (line 51) | def __init__(self):
method next (line 55) | async def next(self, trace: DSTrace) -> tuple[int, ...]:
method process_uncommitted_nodes (line 85) | def process_uncommitted_nodes(self, trace: DSTrace) -> None:
method select (line 93) | def select(self, trace: DSTrace) -> tuple[int, ...] | None:
method reset (line 97) | def reset(self) -> None:
class RoundRobinScheduler (line 102) | class RoundRobinScheduler(BaseScheduler):
method __init__ (line 110) | def __init__(self, max_trace_num: int, *args, **kwargs):
method select (line 116) | def select(self, trace: DSTrace) -> tuple[int, ...] | None:
class ProbabilisticScheduler (line 139) | class ProbabilisticScheduler(BaseScheduler):
method __init__ (line 145) | def __init__(self, max_trace_num: int, temperature: float = 1.0, *args...
method calculate_potential (line 160) | def calculate_potential(self, trace: DSTrace, leaf_id: int) -> float:
method _softmax_probabilities (line 174) | def _softmax_probabilities(self, potentials: list[float]) -> list[float]:
method select (line 201) | def select(self, trace: DSTrace) -> tuple[int, ...] | None:
class TraceLengthScheduler (line 232) | class TraceLengthScheduler(ProbabilisticScheduler):
method __init__ (line 240) | def __init__(self, max_trace_num: int, temperature: float = 1.0, inver...
method calculate_potential (line 253) | def calculate_potential(self, trace: DSTrace, leaf_id: int) -> float:
class SOTABasedScheduler (line 267) | class SOTABasedScheduler(ProbabilisticScheduler):
method __init__ (line 272) | def __init__(self, max_trace_num: int, temperature: float = 1.0, inver...
method calculate_potential (line 283) | def calculate_potential(self, trace: DSTrace, leaf_id: int) -> float:
class RandomScheduler (line 304) | class RandomScheduler(ProbabilisticScheduler):
method calculate_potential (line 309) | def calculate_potential(self, trace: DSTrace, leaf_id: int) -> float:
class MCTSScheduler (line 316) | class MCTSScheduler(ProbabilisticScheduler):
method __init__ (line 331) | def __init__(self, max_trace_num: int, temperature: float = 1.0, *args...
method _get_q (line 344) | def _get_q(self, node_id: int) -> float:
method _get_u (line 352) | def _get_u(self, node_id: int) -> float:
method select (line 358) | def select(self, trace: DSTrace) -> tuple[int, ...] | None:
method observe_feedback (line 396) | def observe_feedback(self, trace: DSTrace, new_idx: int) -> None:
method reset (line 420) | def reset(self) -> None:
method process_uncommitted_nodes (line 431) | def process_uncommitted_nodes(self, trace: DSTrace) -> None:
FILE: rdagent/scenarios/data_science/proposal/exp_gen/utils.py
function get_component (line 56) | def get_component(name: str) -> Dict[str, Any]:
class CodingSketch (line 69) | class CodingSketch(BaseModel):
function get_packages (line 94) | def get_packages(pkgs: list[str] | None = None) -> str:
FILE: rdagent/scenarios/data_science/scen/__init__.py
class DataScienceScen (line 27) | class DataScienceScen(Scenario):
method __init__ (line 30) | def __init__(self, competition: str) -> None:
method reanalyze_competition_description (line 70) | def reanalyze_competition_description(self):
method _get_description (line 76) | def _get_description(self):
method _get_direction (line 89) | def _get_direction(self):
method _analysis_competition_description (line 92) | def _analysis_competition_description(self):
method real_debug_timeout (line 141) | def real_debug_timeout(self):
method recommend_debug_timeout (line 152) | def recommend_debug_timeout(self):
method real_full_timeout (line 155) | def real_full_timeout(self):
method recommend_full_timeout (line 176) | def recommend_full_timeout(self):
method increase_timeout (line 179) | def increase_timeout(self):
method background (line 184) | def background(self) -> str:
method rich_style_description (line 197) | def rich_style_description(self) -> str:
method get_competition_full_desc (line 203) | def get_competition_full_desc(self) -> str:
method get_scenario_all_desc (line 220) | def get_scenario_all_desc(self, eda_output=None) -> str:
method get_runtime_environment (line 246) | def get_runtime_environment(self) -> str:
method _get_data_folder_description (line 253) | def _get_data_folder_description(self) -> str:
class KaggleScen (line 259) | class KaggleScen(DataScienceScen):
method __init__ (line 268) | def __init__(self, competition: str) -> None:
method _get_description (line 272) | def _get_description(self):
method _get_direction (line 275) | def _get_direction(self):
method rich_style_description (line 279) | def rich_style_description(self) -> str:
FILE: rdagent/scenarios/data_science/scen/utils.py
class FileTreeGenerationError (line 91) | class FileTreeGenerationError(Exception):
class MaxLinesExceededError (line 97) | class MaxLinesExceededError(FileTreeGenerationError):
class DirectoryPermissionError (line 103) | class DirectoryPermissionError(FileTreeGenerationError):
function get_file_len_size (line 109) | def get_file_len_size(f: Path) -> Tuple[int, str]:
function preview_df (line 122) | def preview_df(df: pd.DataFrame, file_name: str, simple=True, show_nan_c...
function preview_csv (line 177) | def preview_csv(p: Path, file_name: str, simple=True, show_nan_columns=F...
function preview_parquet (line 183) | def preview_parquet(p: Path, file_name: str, simple=True, show_nan_colum...
function preview_json (line 189) | def preview_json(p: Path, file_name: str):
function _walk (line 272) | def _walk(path: Path):
class FileTreeGenerator (line 292) | class FileTreeGenerator:
method __init__ (line 297) | def __init__(
method _build_lookup_set (line 322) | def _build_lookup_set(self):
method generate_tree (line 338) | def generate_tree(self, path: Union[str, Path]) -> str:
method _add_line (line 409) | def _add_line(self, text: str) -> None:
method _process_directory (line 424) | def _process_directory(self, path: Path, depth: int, prefix: str, base...
method _process_subdirectories (line 471) | def _process_subdirectories(self, dirs: List[Path], depth: int, prefix...
method _process_single_directory (line 491) | def _process_single_directory(self, d: Path, depth: int, prefix: str, ...
method _process_files (line 513) | def _process_files(self, all_files: List[Path], depth: int, prefix: st...
method _categorize_files (line 533) | def _categorize_files(self, files: List[Path]) -> Tuple[List[Path], Li...
method _get_size_str (line 549) | def _get_size_str(self, file_path: Path) -> str:
class DataFolderDescriptor (line 558) | class DataFolderDescriptor:
method __init__ (line 563) | def __init__(self, tree_generator: FileTreeGenerator = None):
method describe_folder (line 572) | def describe_folder(
method _select_files_for_preview (line 647) | def _select_files_for_preview(
function file_tree_v2 (line 689) | def file_tree_v2(path: Union[str, Path], max_lines: int = 200, priority_...
function describe_data_folder_v2 (line 695) | def describe_data_folder_v2(
FILE: rdagent/scenarios/data_science/test_eval.py
class NoTestEvalError (line 9) | class NoTestEvalError(Exception):
class TestEvalBase (line 13) | class TestEvalBase:
method eval (line 17) | def eval(self, competition: str, workspace: FBWorkspace) -> str:
method valid (line 21) | def valid(self, competition: str, workspace: FBWorkspace) -> tuple[str...
method enabled (line 25) | def enabled(self, competition) -> bool:
method get_sample_submission_name (line 29) | def get_sample_submission_name(self, competition: str) -> str:
method is_sub_enabled (line 47) | def is_sub_enabled(self, competition: str) -> bool:
class TestEval (line 62) | class TestEval(TestEvalBase):
method __init__ (line 65) | def __init__(self) -> None:
method eval (line 69) | def eval(self, competition: str, workspace: FBWorkspace) -> str:
method valid (line 84) | def valid(self, competition: str, workspace: FBWorkspace) -> tuple[str...
method enabled (line 101) | def enabled(self, competition) -> bool:
class MLETestEval (line 107) | class MLETestEval(TestEvalBase):
method __init__ (line 110) | def __init__(self) -> None:
method eval (line 117) | def eval(self, competition: str, workspace: FBWorkspace) -> str:
method valid (line 126) | def valid(self, competition: str, workspace: FBWorkspace) -> tuple[str...
method enabled (line 138) | def enabled(self, competition) -> bool:
function get_test_eval (line 142) | def get_test_eval() -> TestEvalBase:
FILE: rdagent/scenarios/finetune/benchmark/benchmark.py
function get_model_inference_config (line 49) | def get_model_inference_config(base_model_name: str, gpu_count: int) -> ...
function detect_model_type (line 94) | def detect_model_type(model_path: str) -> bool:
function run_benchmark (line 115) | def run_benchmark(
function get_benchmark_ranges (line 331) | def get_benchmark_ranges() -> tuple[str, str]:
FILE: rdagent/scenarios/finetune/benchmark/data/adaptor.py
class BenchmarkConfig (line 20) | class BenchmarkConfig:
FILE: rdagent/scenarios/finetune/benchmark/data/default.py
function _to_bool (line 22) | def _to_bool(value: Any) -> bool:
function _is_correct (line 38) | def _is_correct(sample: Dict) -> bool:
function _format_value (line 62) | def _format_value(value: Any) -> str:
function _format_prompt (line 71) | def _format_prompt(prompt: Any) -> str:
function _extract_tag_content (line 111) | def _extract_tag_content(prompt: Any, tag_name: str) -> str:
function _get_question (line 137) | def _get_question(sample: Dict, pred_entry: Dict) -> str:
function _get_gold (line 160) | def _get_gold(sample: Dict, pred_entry: Dict) -> str:
function _get_prediction (line 182) | def _get_prediction(sample: Dict, pred_entry: Dict) -> str:
function extract_error_samples (line 208) | def extract_error_samples(
FILE: rdagent/scenarios/finetune/benchmark/data/financeiq_gen.py
function download_financeiq_dataset (line 15) | def download_financeiq_dataset() -> None:
function extract_error_samples (line 55) | def extract_error_samples(results_base: Path, max_samples: int = 10) -> ...
FILE: rdagent/scenarios/finetune/benchmark/merge/merge.py
function is_blackwell_gpu (line 12) | def is_blackwell_gpu() -> bool:
function check_if_merging_needed (line 29) | def check_if_merging_needed(model_path: str | Path) -> bool:
function merge_model (line 50) | def merge_model(env, workspace_path: Path, base_model_path: str, adapter...
FILE: rdagent/scenarios/finetune/datasets/__init__.py
class DatasetConfig (line 23) | class DatasetConfig:
function _remove_eval_splits (line 35) | def _remove_eval_splits(out_dir: str) -> None:
function prepare (line 71) | def prepare(name: str, force: bool = False) -> str:
function prepare_all (line 113) | def prepare_all(force: bool = False) -> dict[str, str]:
FILE: rdagent/scenarios/finetune/datasets/chemcot/__init__.py
function normalize_rcr (line 7) | def normalize_rcr(out_dir: str) -> None:
FILE: rdagent/scenarios/finetune/datasets/financeiq/split.py
function get_split_indices (line 7) | def get_split_indices(
function split_financeiq_dataset (line 26) | def split_financeiq_dataset(data_dir: str, split: Literal["train", "test...
FILE: rdagent/scenarios/finetune/dev/feedback.py
class FTExperiment2Feedback (line 28) | class FTExperiment2Feedback(Experiment2Feedback):
method __init__ (line 31) | def __init__(self, scen: Scenario, version: str = "exp_feedback") -> N...
method generate_feedback (line 35) | def generate_feedback(
FILE: rdagent/scenarios/finetune/download/hf.py
function _ensure_parent (line 7) | def _ensure_parent(path: Path) -> None:
function _get_hf_token (line 11) | def _get_hf_token(token: Optional[str] = None) -> Optional[str]:
function download_dataset (line 21) | def download_dataset(
function download_model (line 66) | def download_model(
FILE: rdagent/scenarios/finetune/experiment/experiment.py
class FTExperiment (line 13) | class FTExperiment(Experiment[Task, FTWorkspace, FTWorkspace]):
method __init__ (line 14) | def __init__(self, sub_tasks: list[Task], *args, **kwargs) -> None:
method is_ready_to_run (line 27) | def is_ready_to_run(self) -> bool:
FILE: rdagent/scenarios/finetune/experiment/workspace.py
class FTWorkspace (line 27) | class FTWorkspace(FBWorkspace):
method __init__ (line 37) | def __init__(self, *args, **kwargs):
method run (line 48) | def run(
FILE: rdagent/scenarios/finetune/loop.py
class LLMFinetuneRDLoop (line 14) | class LLMFinetuneRDLoop(RDLoop):
method __init__ (line 20) | def __init__(self, PROP_SETTING: LLMFinetunePropSetting):
method direct_exp_gen (line 32) | async def direct_exp_gen(self, prev_out: dict[str, Any]):
method coding (line 39) | def coding(self, prev_out: dict[str, Any]):
method feedback (line 46) | def feedback(self, prev_out: dict[str, Any]):
method record (line 57) | def record(self, prev_out: dict[str, Any]):
FILE: rdagent/scenarios/finetune/proposal/proposal.py
class FTHypothesis (line 24) | class FTHypothesis(Hypothesis):
method __init__ (line 27) | def __init__(
method __str__ (line 43) | def __str__(self) -> str:
class LLMFinetuneExpGen (line 56) | class LLMFinetuneExpGen(ExpGen):
method __init__ (line 62) | def __init__(self, scen: LLMFinetuneScen):
method gen (line 65) | def gen(self, trace: Trace) -> FTExperiment:
method _gen_hypothesis (line 74) | def _gen_hypothesis(self, trace: Trace, base_model: str, parent_exp: F...
FILE: rdagent/scenarios/finetune/proposal/trace.py
class FTTrace (line 23) | class FTTrace(Trace["LLMFinetuneScen", KnowledgeBase]):
method __init__ (line 30) | def __init__(self, scen: "LLMFinetuneScen", knowledge_base: KnowledgeB...
method sota_benchmark (line 36) | def sota_benchmark(self) -> dict | None:
method get_experiment_info (line 49) | def get_experiment_info(self, exp: "FTExperiment") -> dict[str, Any]:
method sota_info (line 75) | def sota_info(self) -> dict[str, Any] | None:
FILE: rdagent/scenarios/finetune/scen/docker_scripts/extract_parameters.py
function extract_field_info (line 31) | def extract_field_info(field):
function extract_params (line 51) | def extract_params(cls):
function extract_base_params (line 56) | def extract_base_params(cls):
function save_parameters (line 73) | def save_parameters(base_dir):
function main (line 109) | def main():
FILE: rdagent/scenarios/finetune/scen/llama_factory_manager.py
class LLaMAFactoryManager (line 99) | class LLaMAFactoryManager:
method __init__ (line 102) | def __init__(self):
method extract_info_from_docker (line 107) | def extract_info_from_docker(self) -> Dict:
method _load_extracted_data (line 154) | def _load_extracted_data(self) -> Dict:
method get_info (line 172) | def get_info(self) -> Dict:
method methods (line 179) | def methods(self) -> List[str]:
method models (line 184) | def models(self) -> List[str]:
method hf_models (line 189) | def hf_models(self) -> List[str]:
method peft_methods (line 195) | def peft_methods(self) -> List[str]:
method training_stages (line 201) | def training_stages(self) -> Dict[str, str]:
method templates (line 206) | def templates(self) -> List[str]:
method is_peft_method (line 210) | def is_peft_method(self, method: str) -> bool:
method get_parameters (line 214) | def get_parameters(self, param_type: Optional[str] = None) -> Dict:
method _format_param_line (line 221) | def _format_param_line(self, param_name: str, param_info: dict, max_he...
method _format_params_dict (line 238) | def _format_params_dict(self, params_dict: dict, max_help_len: int | N...
method format_shared_params (line 246) | def format_shared_params(self, max_help_len: int | None = DEFAULT_HELP...
method format_method_specific_params (line 263) | def format_method_specific_params(self, method: str, max_help_len: int...
FILE: rdagent/scenarios/finetune/scen/memory_estimator.py
class MemoryEstimator (line 10) | class MemoryEstimator:
method __init__ (line 32) | def __init__(
method from_model_name (line 52) | def from_model_name(
method _base_memory (line 73) | def _base_memory(self, method: str) -> float:
method _activation_factor (line 87) | def _activation_factor(self, method: str) -> float:
method _find_max_seq_len (line 91) | def _find_max_seq_len(self, method: str, batch_size: int = 1) -> int:
method estimate (line 105) | def estimate(self) -> dict[str, int]:
method format (line 110) | def format(self, estimates: dict[str, int] = None) -> str:
FILE: rdagent/scenarios/finetune/scen/scenario.py
class LLMFinetuneScen (line 26) | class LLMFinetuneScen(DataScienceScen):
method __init__ (line 29) | def __init__(self) -> None:
method benchmark_hash (line 74) | def benchmark_hash(self, model_name, benchmark_name) -> str:
method run_baseline_model_evaluation (line 78) | def run_baseline_model_evaluation(self, model_name, benchmark_name) ->...
method real_full_timeout (line 112) | def real_full_timeout(self):
method _generate_memory_report (line 115) | def _generate_memory_report(self) -> str:
method _validate_and_prepare_environment (line 150) | def _validate_and_prepare_environment(self):
method _initialize_llama_factory (line 164) | def _initialize_llama_factory(self):
method _select_relevant_datasets (line 175) | def _select_relevant_datasets(self) -> list[str]:
method _llm_select_datasets (line 201) | def _llm_select_datasets(self) -> tuple[list[str], str]:
method _prepare_dataset_config (line 231) | def _prepare_dataset_config(self) -> dict:
method metric_direction (line 280) | def metric_direction(self) -> bool:
method get_scenario_all_desc (line 284) | def get_scenario_all_desc(self, enable_dataset_description: bool = Fal...
FILE: rdagent/scenarios/finetune/scen/utils.py
function _find_data_files (line 21) | def _find_data_files(dataset_path: Path, max_files: int = 50) -> list[Pa...
function _truncate_long_values (line 40) | def _truncate_long_values(obj, max_length: int = 3000):
function _compute_column_stats (line 67) | def _compute_column_stats(data: list[dict]) -> dict[str, dict]:
function _load_dataset_for_stats (line 138) | def _load_dataset_for_stats(data_files: list[Path], max_samples: int = 5...
class FinetuneDatasetDescription (line 187) | class FinetuneDatasetDescription(dict):
method __str__ (line 190) | def __str__(self) -> str:
class FinetuneFileDescription (line 216) | class FinetuneFileDescription(dict):
method __str__ (line 219) | def __str__(self) -> str:
class FinetuneDatasetDescriptor (line 230) | class FinetuneDatasetDescriptor:
method _generate_file_tree (line 233) | def _generate_file_tree(self, dataset_path: Path) -> str:
method _count_samples_in_file (line 242) | def _count_samples_in_file(self, data_file: Path) -> int:
method _generate_stats (line 275) | def _generate_stats(self, dataset_path: Path, include_column_stats: bo...
method hash_dataset_path (line 334) | def hash_dataset_path(
method describe_dataset_folder (line 348) | def describe_dataset_folder(
method get_dataset_stats (line 425) | def get_dataset_stats(self, dataset_path: Path) -> dict[str, Any]:
method _walk (line 429) | def _walk(self, dir_path: Path, depth: int, max_depth: int, target_nam...
method _read_dataset_readme (line 445) | def _read_dataset_readme(self, dataset_path: Path, max_chars: int = 50...
method _extract_samples_for_template (line 467) | def _extract_samples_for_template(self, data_files: list[Path], max_sa...
method describe_model (line 502) | def describe_model(self, base_model_name: str = None, ft_file_path: st...
method describe_file_json (line 579) | def describe_file_json(self, data_file: Path, max_samples: int = 3) ->...
method describe_file_jsonl (line 594) | def describe_file_jsonl(self, data_file: Path, max_samples: int = 3) -...
method describe_file_csv (line 616) | def describe_file_csv(self, data_file: Path, max_samples: int = 3) -> ...
method describe_file_parquet (line 634) | def describe_file_parquet(self, data_file: Path, max_samples: int = 3)...
method describe_data_file (line 652) | def describe_data_file(self, data_file: Path) -> FinetuneFileDescription:
method _discover_subtasks (line 677) | def _discover_subtasks(self, dataset_dir: Path) -> dict:
method analyze_dataset (line 712) | def analyze_dataset(self, dataset_dir: Path) -> dict:
function _read_single_dataset_readme (line 774) | def _read_single_dataset_readme(dataset_path: Path, max_chars: int = 200...
function check_all_dataset_in_info (line 830) | def check_all_dataset_in_info(ft_file_path, existing_config, max_depth: ...
function generate_dataset_info_config (line 863) | def generate_dataset_info_config(target_dataset_list: list, ft_file_path...
FILE: rdagent/scenarios/finetune/train/eval.py
function extract_loss_history (line 30) | def extract_loss_history(output_path) -> Dict[str, List[Dict[str, Any]]]:
class FTRunnerEvaluator (line 78) | class FTRunnerEvaluator(CoSTEEREvaluator):
method evaluate (line 81) | def evaluate(
method _generate_llm_feedback (line 239) | def _generate_llm_feedback(
method _run_full_data_processing (line 303) | def _run_full_data_processing(self, implementation: FBWorkspace):
FILE: rdagent/scenarios/finetune/train/runner.py
class FTRunnerSettings (line 31) | class FTRunnerSettings(FTCoderCoSTEERSettings):
class Config (line 34) | class Config:
class FTRunnerEvolvingStrategy (line 38) | class FTRunnerEvolvingStrategy(MultiProcessEvolvingStrategy):
method implement_one_task (line 45) | def implement_one_task(
class LLMFinetuneRunner (line 63) | class LLMFinetuneRunner(CoSTEER):
method __init__ (line 66) | def __init__(
method develop (line 95) | def develop(self, exp):
method get_develop_max_seconds (line 110) | def get_develop_max_seconds(self) -> int | None:
method compare_and_pick_fb (line 114) | def compare_and_pick_fb(self, base_fb, new_fb) -> bool:
FILE: rdagent/scenarios/finetune/utils.py
function ensure_ft_assets_exist (line 9) | def ensure_ft_assets_exist(
FILE: rdagent/scenarios/general_model/scenario.py
class GeneralModelScenario (line 8) | class GeneralModelScenario(Scenario):
method __init__ (line 9) | def __init__(self) -> None:
method background (line 18) | def background(self) -> str:
method source_data (line 22) | def source_data(self) -> str:
method output_format (line 26) | def output_format(self) -> str:
method interface (line 30) | def interface(self) -> str:
method simulator (line 34) | def simulator(self) -> str:
method rich_style_description (line 38) | def rich_style_description(self) -> str:
method get_scenario_all_desc (line 41) | def get_scenario_all_desc(
method get_runtime_environment (line 54) | def get_runtime_environment(self):
FILE: rdagent/scenarios/kaggle/developer/coder.py
class KGModelFeatureSelectionCoder (line 35) | class KGModelFeatureSelectionCoder(Developer[KGModelExperiment]):
method develop (line 36) | def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
FILE: rdagent/scenarios/kaggle/developer/feedback.py
class KGExperiment2Feedback (line 16) | class KGExperiment2Feedback(Experiment2Feedback):
method process_results (line 17) | def process_results(self, current_result, sota_result):
method generate_feedback (line 40) | def generate_feedback(self, exp: Experiment, trace: Trace) -> Hypothes...
FILE: rdagent/scenarios/kaggle/developer/runner.py
class KGCachedRunner (line 17) | class KGCachedRunner(CachedRunner[ASpecificExp]):
method get_cache_key (line 18) | def get_cache_key(self, exp: ASpecificExp) -> str:
method assign_cached_result (line 28) | def assign_cached_result(self, exp: Experiment, cached_res: Experiment...
method init_develop (line 41) | def init_develop(self, exp: KGFactorExperiment | KGModelExperiment) ->...
class KGModelRunner (line 60) | class KGModelRunner(KGCachedRunner[KGModelExperiment]):
method develop (line 62) | def develop(self, exp: KGModelExperiment) -> KGModelExperiment:
class KGFactorRunner (line 94) | class KGFactorRunner(KGCachedRunner[KGFactorExperiment]):
method develop (line 96) | def develop(self, exp: KGFactorExperiment) -> KGFactorExperiment:
FILE: rdagent/scenarios/kaggle/experiment/kaggle_experiment.py
class KGModelExperiment (line 37) | class KGModelExperiment(ModelExperiment[ModelTask, KGFBWorkspace, ModelF...
method __init__ (line 38) | def __init__(self, *args, source_feature_size: int = None, **kwargs) -...
class KGFactorExperiment (line 61) | class KGFactorExperiment(FeatureExperiment[FactorTask, KGFBWorkspace, Fa...
method __init__ (line 62) | def __init__(self, *args, source_feature_size: int = None, **kwargs) -...
FILE: rdagent/scenarios/kaggle/experiment/scenario.py
class KGScenario (line 36) | class KGScenario(Scenario):
method __init__ (line 37) | def __init__(self, competition: str) -> None:
method _analysis_competition_description (line 73) | def _analysis_competition_description(self):
method get_competition_full_desc (line 101) | def get_competition_full_desc(self) -> str:
method background (line 114) | def background(self) -> str:
method source_data (line 134) | def source_data(self) -> str:
method output_format (line 167) | def output_format(self, tag=None) -> str:
method interface (line 182) | def interface(self, tag=None) -> str:
method simulator (line 197) | def simulator(self, tag=None) -> str:
method rich_style_description (line 215) | def rich_style_description(self) -> str:
method get_scenario_all_desc (line 242) | def get_scenario_all_desc(
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 31) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 58) | def preprocess_transform(X: pd.DataFrame, preprocessor, label_encoders):
function preprocess_script (line 77) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 27) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/spaceship-titanic_template/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 24) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/covid19-global-forecasting-week-1/fea_share_preprocess.py
function prepreprocess (line 9) | def prepreprocess():
function preprocess_script (line 45) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/covid19-global-forecasting-week-1/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/covid19-global-forecasting-week-1/model/model_xgboost.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 27) | def predict(models, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/covid19-global-forecasting-week-1/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/covid19-global-forecasting-week-1/train.py
function compute_rmsle (line 17) | def compute_rmsle(y_true, y_pred):
function import_module_from_path (line 22) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/fea_share_preprocess.py
function prepreprocess (line 9) | def prepreprocess():
function preprocess_script (line 26) | def preprocess_script():
function clean_and_impute_data (line 54) | def clean_and_impute_data(X_train, X_valid, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/model_nn.py
class NeuralNetwork (line 12) | class NeuralNetwork(nn.Module):
method __init__ (line 13) | def __init__(self, input_channels, num_classes):
method forward (line 23) | def forward(self, x):
function fit (line 34) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 82) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/model_xgboost.py
function fit (line 9) | def fit(X_train, y_train, X_valid, y_valid):
function predict (line 30) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/digit-recognizer/train.py
function compute_metrics_for_classification (line 17) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 22) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/fea_share_preprocess.py
function preprocess_script (line 9) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/feature/feature.py
class IdentityFeature (line 10) | class IdentityFeature:
method fit (line 11) | def fit(self, train_df: pd.DataFrame):
method transform (line 18) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/model/model_randomforest.py
function fit (line 6) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 19) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/model/model_xgboost.py
function is_sparse_df (line 10) | def is_sparse_df(df: pd.DataFrame) -> bool:
function fit (line 15) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 30) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/feedback-prize-english-language-learning/train.py
function import_module_from_path (line 11) | def import_module_from_path(module_name, module_path):
function MCRMSE (line 18) | def MCRMSE(y_true, y_pred):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/fea_share_preprocess.py
function prepreprocess (line 9) | def prepreprocess():
function preprocess_script (line 26) | def preprocess_script():
function clean_and_impute_data (line 50) | def clean_and_impute_data(X_train, X_valid, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 31) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 29) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/train.py
function compute_metrics_for_classification (line 17) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 23) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/forest-cover-type-prediction/train_past.py
function import_module_from_path (line 19) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 31) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 63) | def preprocess_transform(X: pd.DataFrame, preprocessor):
function preprocess_script (line 83) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/model/model_nn.py
class FeatureInteractionModel (line 13) | class FeatureInteractionModel(nn.Module):
method __init__ (line 14) | def __init__(self, num_features):
method forward (line 23) | def forward(self, x):
function fit (line 32) | def fit(X_train, y_train, X_valid, y_valid):
function predict (line 67) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/model/model_randomforest.py
function select (line 13) | def select(X: pd.DataFrame) -> pd.DataFrame:
function fit (line 21) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 43) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/model/model_xgboost.py
function select (line 9) | def select(X: pd.DataFrame) -> pd.DataFrame:
function fit (line 14) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 34) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/meta_tpl_deprecated/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function compute_metrics_for_classification (line 24) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 30) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/fea_share_preprocess.py
function prepreprocess (line 12) | def prepreprocess():
function preprocess_script (line 29) | def preprocess_script():
function clean_and_impute_data (line 53) | def clean_and_impute_data(X_train, X_valid, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/feature/feature.py
class DatetimeFeature (line 9) | class DatetimeFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/model_linear.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 31) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/select_linear.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/new-york-city-taxi-fare-prediction/train.py
function compute_metrics_for_classification (line 17) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 23) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/fea_share_preprocess.py
function prepreprocess (line 12) | def prepreprocess():
function preprocess_fit (line 37) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 62) | def preprocess_transform(X: pd.DataFrame, preprocessor, numerical_cols, ...
function preprocess_script (line 70) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/model_randomforest.py
function fit (line 7) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/model_xgboost.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 25) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/optiver-realized-volatility-prediction/train.py
function compute_rmspe (line 17) | def compute_rmspe(y_true, y_pred):
function import_module_from_path (line 23) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/fea_share_preprocess.py
function preprocess_script (line 8) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/model_randomforest.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 22) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 29) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e11/train.py
function import_module_from_path (line 12) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/fea_share_preprocess.py
function preprocess_script (line 8) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/model_randomforest.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 22) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 29) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e14/train.py
function import_module_from_path (line 12) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/fea_share_preprocess.py
function preprocess_script (line 9) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/model_randomforest.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 22) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 29) | def predict(model, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e16/train.py
function import_module_from_path (line 12) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 48) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 67) | def preprocess_transform(X: pd.DataFrame, preprocessor):
function preprocess_script (line 80) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/model_xgboost.py
function fit (line 10) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 32) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s3e26/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 25) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/fea_share_preprocess.py
function prepreprocess (line 10) | def prepreprocess():
function preprocess_fit (line 22) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 38) | def preprocess_transform(X: pd.DataFrame, preprocessor, numerical_cols):
function preprocess_script (line 47) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/model_randomforest.py
function fit (line 7) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/model_xgboost.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 28) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e5/train.py
function compute_r2 (line 17) | def compute_r2(y_true, y_pred):
function import_module_from_path (line 22) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 31) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 62) | def preprocess_transform(X: pd.DataFrame, preprocessor, numerical_cols, ...
function preprocess_script (line 71) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 31) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 27) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e8/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 24) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 23) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 48) | def preprocess_transform(X: pd.DataFrame, preprocessor, numerical_cols, ...
function preprocess_script (line 57) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/model_randomforest.py
function fit (line 7) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/model_xgboost.py
function fit (line 5) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 25) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/playground-series-s4e9/train.py
function compute_rmse (line 18) | def compute_rmse(y_true, y_pred):
function import_module_from_path (line 25) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 69) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 88) | def preprocess_transform(X: pd.DataFrame, preprocessor):
function preprocess_script (line 101) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/model_randomforest.py
function fit (line 12) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 25) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/model_xgboost.py
function fit (line 10) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 32) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/sf-crime/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 25) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/fea_share_preprocess.py
function prepreprocess (line 11) | def prepreprocess():
function preprocess_fit (line 31) | def preprocess_fit(X_train: pd.DataFrame):
function preprocess_transform (line 58) | def preprocess_transform(X: pd.DataFrame, preprocessor, label_encoders):
function preprocess_script (line 77) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 26) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 27) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/spaceship-titanic/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 24) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/fea_share_preprocess.py
function prepreprocess (line 8) | def prepreprocess():
function preprocess_script (line 53) | def preprocess_script():
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/model/model_xgboost.py
function fit (line 10) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 32) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/statoil-iceberg-classifier-challenge/train.py
function compute_metrics_for_classification (line 18) | def compute_metrics_for_classification(y_true, y_pred):
function import_module_from_path (line 23) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/fea_share_preprocess.py
function prepreprocess (line 12) | def prepreprocess():
function preprocess_script (line 29) | def preprocess_script():
function clean_and_impute_data (line 62) | def clean_and_impute_data(X_train, X_valid, X_test):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/feature/feature.py
class IdentityFeature (line 9) | class IdentityFeature:
method fit (line 10) | def fit(self, train_df: pd.DataFrame):
method transform (line 16) | def transform(self, X: pd.DataFrame):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/model_randomforest.py
function fit (line 13) | def fit(X_train: pd.DataFrame, y_train: pd.Series, X_valid: pd.DataFrame...
function predict (line 31) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/model_xgboost.py
function fit (line 9) | def fit(X_train: pd.DataFrame, y_train: pd.DataFrame, X_valid: pd.DataFr...
function predict (line 30) | def predict(model, X):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/select_lightgbm.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/select_nn.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/select_randomforest.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/model/select_xgboost.py
function select (line 4) | def select(X: pd.DataFrame) -> pd.DataFrame:
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground-series-dec-2021/train.py
function import_module_from_path (line 17) | def import_module_from_path(module_name, module_path):
FILE: rdagent/scenarios/kaggle/experiment/templates/tabular-playground
Condensed preview — 835 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,989K chars).
[
{
"path": ".bumpversion.cfg",
"chars": 98,
"preview": "[bumpversion]\ncurrent_version = 0.0.0\ncommit = True\ntag = True\n\n[bumpversion:file:pyproject.toml]\n"
},
{
"path": ".commitlintrc.js",
"chars": 920,
"preview": "module.exports = {\n extends: [\"@commitlint/config-conventional\"],\n rules: {\n // Configuration Format: [leve"
},
{
"path": ".devcontainer/Dockerfile",
"chars": 361,
"preview": "# 1. Pull down your Azure Container Registry image\nFROM rdagentappregistry.azurecr.io/rd-agent-mle:20250623\n\n# 2. (Optio"
},
{
"path": ".devcontainer/README.md",
"chars": 1620,
"preview": "# Introduction\n\n!!!!!This dev container is not for public development!!!!!!\n!!!!!Please don't use it if you are just a p"
},
{
"path": ".devcontainer/devcontainer.json",
"chars": 776,
"preview": "{\n \"name\": \"rd-agent-mle DevContainer\",\n \"build\": {\n \"dockerfile\": \"Dockerfile\",\n \"context\": \"..\"\n },\n \"worksp"
},
{
"path": ".devcontainer/env",
"chars": 945,
"preview": "# Global configs:\n\nMAX_RETRY=12000\nRETRY_WAIT_SECONDS=5\nTIMEOUT_FAIL_LIMIT=100\n\n# litellm\n# CHAT_MODEL=gpt-4o\n# CHAT_TEM"
},
{
"path": ".github/FUNDING.yml",
"chars": 25,
"preview": "github:\n - MIIC-finance\n"
},
{
"path": ".github/ISSUE_TEMPLATE/bug-report.md",
"chars": 1034,
"preview": "---\nname: \"\\U0001F41B Bug Report\"\nabout: Submit a bug report to help us improve RD-Agent\nlabels: bug\n\n---\n\n## 🐛 Bug Desc"
},
{
"path": ".github/ISSUE_TEMPLATE/documentation.md",
"chars": 201,
"preview": "---\nname: \"\\U0001F4D6 Documentation\"\nabout: Report an issue related to documentation\n\n---\n\n## 📖 Documentation\n\n<!-- Plea"
},
{
"path": ".github/ISSUE_TEMPLATE/feature-request.md",
"chars": 594,
"preview": "---\nname: \"\\U0001F31FFeature Request\"\nabout: Request for a new RD-Agent feature\nlabels: enhancement\n\n---\n\n## 🌟 Feature D"
},
{
"path": ".github/ISSUE_TEMPLATE/question.md",
"chars": 315,
"preview": "---\nname: \"❓Questions & Help\"\nabout: Have some questions? We can offer help.\nlabels: question\n\n---\n\n## ❓ Questions and H"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 2114,
"preview": "<!--- Thank you for submitting a Pull Request! In order to make our work smoother. -->\n<!--- please make sure your Pull "
},
{
"path": ".github/dependabot.yml",
"chars": 404,
"preview": "updates:\n - commit-message:\n prefix: build(actions)\n directory: /\n package-ecosystem: github-actions\n sch"
},
{
"path": ".github/workflows/ci.yml",
"chars": 1967,
"preview": "concurrency:\n cancel-in-progress: true\n group: ${{ github.workflow }}-${{ github.ref }}\njobs:\n ci:\n if: ${{ !cance"
},
{
"path": ".github/workflows/pr.yml",
"chars": 877,
"preview": "name: Lint pull request title\n\non:\n pull_request:\n types:\n - opened\n - synchronize\n - reopened\n "
},
{
"path": ".github/workflows/readthedocs-preview.yml",
"chars": 378,
"preview": "concurrency:\n cancel-in-progress: true\n group: ${{ github.workflow }}-${{ github.ref }}\njobs:\n documentation-links:\n "
},
{
"path": ".github/workflows/release.yml",
"chars": 1563,
"preview": "name: Release\non:\n push:\n branches:\n - main\npermissions:\n contents: read\njobs:\n release_and_publish:\n perm"
},
{
"path": ".gitignore",
"chars": 2377,
"preview": "# Custom\n*.swp\n.DS_Store\nPipfile\npublic\nrelease-notes.md\ntypescript*\ntmp/\n.ai/\n\n# Byte-compiled / optimized / DLL files\n"
},
{
"path": ".readthedocs.yaml",
"chars": 1496,
"preview": "# .readthedocs.yml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html fo"
},
{
"path": ".streamlit/config.toml",
"chars": 38,
"preview": "[client]\nshowSidebarNavigation = false"
},
{
"path": "CHANGELOG.md",
"chars": 97692,
"preview": "# Changelog\n\n## [0.8.0](https://github.com/microsoft/RD-Agent/compare/v0.7.0...v0.8.0) (2025-11-03)\n\n\n### Features\n\n* ad"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 444,
"preview": "# Microsoft Open Source Code of Conduct\n\nThis project has adopted the [Microsoft Open Source Code of Conduct](https://op"
},
{
"path": "CONTRIBUTING.md",
"chars": 1808,
"preview": "# Contributing to RD-Agent\n\nWe welcome contributions and suggestions to improve RD-Agent. Whether it's solving an issue,"
},
{
"path": "LICENSE",
"chars": 1141,
"preview": " MIT License\n\n Copyright (c) Microsoft Corporation.\n\n Permission is hereby granted, free of charge, to any pers"
},
{
"path": "Makefile",
"chars": 9528,
"preview": ".PHONY: clean deepclean install init-qlib-env dev constraints black isort mypy ruff toml-sort lint pre-commit test-run t"
},
{
"path": "README.md",
"chars": 32164,
"preview": "<h4 align=\"center\">\n <img src=\"docs/_static/logo.png\" alt=\"RA-Agent logo\" style=\"width:70%; \">\n \n <a href=\"https://rd"
},
{
"path": "SECURITY.md",
"chars": 2656,
"preview": "<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->\n\n## Security\n\nMicrosoft takes the security of our software products an"
},
{
"path": "SUPPORT.md",
"chars": 1244,
"preview": "# TODO: The maintainer of this repo has not yet edited this file\r\n\r\n**REPO OWNER**: Do you want Customer Service & Suppo"
},
{
"path": "TODO.md",
"chars": 523,
"preview": "We encourage to set the TODOs in code. But some TODOs are more global.\nSo we place it here.\n\n\n- [ ] Aligning the naming "
},
{
"path": "constraints/3.10.txt",
"chars": 78,
"preview": "azure-identity==1.17.1\ndill==0.3.9\npillow==10.4.0\npsutil==6.1.0\nscipy==1.14.1\n"
},
{
"path": "constraints/3.11.txt",
"chars": 78,
"preview": "azure-identity==1.17.1\ndill==0.3.9\npillow==10.4.0\npsutil==6.1.0\nscipy==1.14.1\n"
},
{
"path": "docs/Makefile",
"chars": 633,
"preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
},
{
"path": "docs/_static/RD2bench.json",
"chars": 35405,
"preview": "{\r\n \"alpha053_15\": {\r\n \"description\": \"Reversal class factor, negative delta of a ratio involving close, low, and hi"
},
{
"path": "docs/api_reference.rst",
"chars": 181,
"preview": "=============\nAPI Reference\n=============\n\nHere you can find all ``RDAgent``'s interfaces.\n\n\nRD Loop\n=======\n\nResearch\n-"
},
{
"path": "docs/changelog.md",
"chars": 55,
"preview": "# Changelog\n\n## [Unreleased]\n<!-- insertion marker -->\n"
},
{
"path": "docs/conf.py",
"chars": 2226,
"preview": "# Configuration file for the Sphinx documentation builder.\n#\n# For the full list of built-in configuration values, see t"
},
{
"path": "docs/development.rst",
"chars": 1988,
"preview": "=========================\nFor Development\n=========================\n\nIf you want to try the latest version or contribute"
},
{
"path": "docs/index.rst",
"chars": 708,
"preview": ".. RDAgent documentation master file, created by\n sphinx-quickstart on Mon Jul 15 04:27:50 2024.\n You can adapt this"
},
{
"path": "docs/installation_and_configuration.rst",
"chars": 21921,
"preview": "==============================\nInstallation and Configuration\n==============================\n\nInstallation\n============\n"
},
{
"path": "docs/introduction.rst",
"chars": 1243,
"preview": "=========================\nIntroduction\n=========================\n\n\n\nIn modern industry, research and development (R&D) i"
},
{
"path": "docs/make.bat",
"chars": 799,
"preview": "@ECHO OFF\r\n\r\npushd %~dp0\r\n\r\nREM Command file for Sphinx documentation\r\n\r\nif \"%SPHINXBUILD%\" == \"\" (\r\n\tset SPHINXBUILD=sp"
},
{
"path": "docs/policy.rst",
"chars": 1473,
"preview": "======\nPolicy\n======\n\nThis project welcomes contributions and suggestions. Most contributions require you to agree to a"
},
{
"path": "docs/project_framework_introduction.rst",
"chars": 1369,
"preview": "===============================\nFramework Design & Components\n===============================\n\nFramework & Components\n=="
},
{
"path": "docs/requirements.txt",
"chars": 47,
"preview": "sphinx\nsphinx_rtd_theme\nfuro\nimportlib.metadata"
},
{
"path": "docs/research/benchmark.rst",
"chars": 4158,
"preview": "==============================\nBenchmark\n==============================\n\nIntroduction\n=============\n\nBenchmarking the ca"
},
{
"path": "docs/research/catalog.rst",
"chars": 1344,
"preview": "===========\nResearch\n===========\n\nTo achieve the good effects and improve R&D capabilities, we face multiple challenges,"
},
{
"path": "docs/research/dev.rst",
"chars": 1613,
"preview": "==============================\nDevelopment\n==============================\n\n\nRelated Paper\n-------------\n\n- `Collaborativ"
},
{
"path": "docs/scens/catalog.rst",
"chars": 1283,
"preview": "=========================\nScenarios\n=========================\n\nScenario lists\n=========================\n\nIn the two key "
},
{
"path": "docs/scens/data_agent_fin.rst",
"chars": 5718,
"preview": ".. _data_agent_fin:\n\n=====================\nFinance Data Agent\n=====================\n\n\n**🤖 Automated Quantitative Trading"
},
{
"path": "docs/scens/data_copilot_fin.rst",
"chars": 6679,
"preview": ".. _data_copilot_fin:\n\n=====================\nFinance Data Copilot\n=====================\n\n\n**🤖 Automated Quantitative Tra"
},
{
"path": "docs/scens/data_science.rst",
"chars": 26898,
"preview": ".. _data_science_agent:\n\n=======================\nData Science Agent\n=======================\n\n**🤖 Automated Feature Engin"
},
{
"path": "docs/scens/finetune.rst",
"chars": 7007,
"preview": ".. _finetune_agent:\n\n=============================\nFine-tuning an Existing Model\n=============================\n\n## **🎯 S"
},
{
"path": "docs/scens/model_agent_fin.rst",
"chars": 6541,
"preview": ".. _model_agent_fin:\n\n=======================\nFinance Model Agent\n=======================\n\n**🤖 Automated Quantitative Tr"
},
{
"path": "docs/scens/model_copilot_general.rst",
"chars": 4031,
"preview": ".. _model_copilot_general:\n\n======================\nGeneral Model Copilot\n======================\n\n**🤖 Automated Model Res"
},
{
"path": "docs/scens/quant_agent_fin.rst",
"chars": 5197,
"preview": ".. _quant_agent_fin:\n\n=====================\nFinance Quant Agent\n=====================\n\n\n**🥇The First Data-Centric Quant "
},
{
"path": "docs/ui.rst",
"chars": 1638,
"preview": "==============\nUser Interface\n==============\n\n\nIntroduction\n============\n\nRD-Agent will generate some logs during the R&"
},
{
"path": "pyproject.toml",
"chars": 2612,
"preview": "[build-system]\nbuild-backend = \"setuptools.build_meta\"\nrequires = [\n \"setuptools\",\n \"setuptools-scm\",\n]\n\n[project]\naut"
},
{
"path": "rdagent/app/CI/README.md",
"chars": 852,
"preview": "# CI 检查\n\n`.github/workflows/ci.yml`配置了提交时自动运行`Makefile`: 91~103行的命令,可以在这调整执行的命令\n\n在`.env`中设置`USE_CHAT_CACHE=True`可以让第二次修复"
},
{
"path": "rdagent/app/CI/ci.ipynb",
"chars": 357,
"preview": "{\n \"cells\": [\n {\n \"cell_type\": \"code\",\n \"execution_count\": null,\n \"metadata\": {},\n \"outputs\": [],\n \"source\": "
},
{
"path": "rdagent/app/CI/prompts.yaml",
"chars": 5078,
"preview": "generate_lint_command_template: |\n Please generate a command to lint or format a {language} repository.\n Here are some"
},
{
"path": "rdagent/app/CI/run.py",
"chars": 33019,
"preview": "from __future__ import annotations\n\nimport datetime\nimport json\nimport re\nimport shlex\nimport subprocess\nimport time\nfro"
},
{
"path": "rdagent/app/benchmark/factor/analysis.py",
"chars": 7724,
"preview": "import json\nimport pickle\nfrom pathlib import Path\n\nimport fire\nimport matplotlib.pyplot as plt\nimport numpy as np\nimpor"
},
{
"path": "rdagent/app/benchmark/factor/eval.py",
"chars": 1255,
"preview": "from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING\nfrom rdagent.components.benchmark.conf import BenchmarkSet"
},
{
"path": "rdagent/app/benchmark/model/README.md",
"chars": 707,
"preview": "# Tasks\n\n## Task Extraction\nFrom paper to task.\n```bash\n# python rdagent/app/model_implementation/task_extraction.py\n# I"
},
{
"path": "rdagent/app/benchmark/model/eval.py",
"chars": 1526,
"preview": "from pathlib import Path\n\nfrom rdagent.components.coder.model_coder import ModelCoSTEER\nfrom rdagent.components.loader.t"
},
{
"path": "rdagent/app/cli.py",
"chars": 5990,
"preview": "\"\"\"\nCLI entrance for all rdagent application.\n\nThis will\n- make rdagent a nice entry and\n- autoamtically load dotenv\n\"\"\""
},
{
"path": "rdagent/app/data_science/conf.py",
"chars": 8431,
"preview": "from pathlib import Path\nfrom typing import Literal\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.app."
},
{
"path": "rdagent/app/data_science/debug.py",
"chars": 146,
"preview": "import fire\n\nfrom rdagent.scenarios.data_science.debug.data import create_debug_data\n\nif __name__ == \"__main__\":\n fir"
},
{
"path": "rdagent/app/data_science/loop.py",
"chars": 3429,
"preview": "import asyncio\nfrom pathlib import Path\nfrom typing import Optional\n\nimport fire\n\nfrom rdagent.app.data_science.conf imp"
},
{
"path": "rdagent/app/finetune/data_science/conf.py",
"chars": 1698,
"preview": "import os\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfro"
},
{
"path": "rdagent/app/finetune/data_science/loop.py",
"chars": 1211,
"preview": "import asyncio\nfrom pathlib import Path\n\nimport fire\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdage"
},
{
"path": "rdagent/app/finetune/data_science/scen.py",
"chars": 757,
"preview": "from pathlib import Path\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.core.scenario import Scen"
},
{
"path": "rdagent/app/finetune/data_science/tpl/components/coder/data_science/pipeline/prompts.yaml",
"chars": 304,
"preview": "pipeline_coder:\n system: |-\n {% include \"rdagent.components.coder.data_science.pipeline.prompts:pipeline_coder.syste"
},
{
"path": "rdagent/app/finetune/data_science/tpl/scenarios/data_science/proposal/exp_gen/prompts_v2.yaml",
"chars": 270,
"preview": "task_gen:\n system: |-\n {% include \"rdagent.scenarios.data_science.proposal.exp_gen.prompts_v2:task_gen.system\" %}\n "
},
{
"path": "rdagent/app/finetune/llm/README.md",
"chars": 5767,
"preview": "# LLM Fine-tuning (FT) 场景运行指南\n\n本文档介绍如何运行 RD-Agent 的 LLM Fine-tuning 场景。\n\n## 简介\n\nFT 场景用于自动化优化大语言模型在特定 benchmark 上的表现。系统会自"
},
{
"path": "rdagent/app/finetune/llm/conf.py",
"chars": 5207,
"preview": "from pathlib import Path\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.core.conf import ExtendedBaseSe"
},
{
"path": "rdagent/app/finetune/llm/job/README.md",
"chars": 1932,
"preview": "# FT Job Runner\n\n批量并行运行多个 LLM 微调任务的脚本。\n\n## 快速开始\n\n```bash\n# 1. 准备环境配置\ncp .env.template .env\n# 编辑 .env,填入 API key 等配置\n\n# 2"
},
{
"path": "rdagent/app/finetune/llm/job/run_ft_job.sh",
"chars": 6937,
"preview": "#!/bin/bash\n# Run multiple FT tasks in parallel under a single job directory\n#\n# Usage: ./run_ft_job.sh [tasks.json]\n#\n#"
},
{
"path": "rdagent/app/finetune/llm/job/scenarios.json",
"chars": 17543,
"preview": "{\n \"_comment\": \"Benchmark scenarios for FT tasks. Used by run_ft_job.sh and UI.\",\n\n \"aime24\": {\n \"category\": \"math\""
},
{
"path": "rdagent/app/finetune/llm/job/tasks.json.example",
"chars": 384,
"preview": "{\n \"tasks\": [\n {\n \"model\": \"Qwen/Qwen3-8B\",\n \"benchmark\": \"aime25\",\n \"gpus\": \"0,1\"\n },\n {\n "
},
{
"path": "rdagent/app/finetune/llm/loop.py",
"chars": 4124,
"preview": "\"\"\"\nLLM Fine-tuning Entry Point\n\nStandard RDLoop entry point for LLM fine-tuning, consistent with data science implement"
},
{
"path": "rdagent/app/finetune/llm/ui/__init__.py",
"chars": 29,
"preview": "# FT (Fine-tune) scenario UI\n"
},
{
"path": "rdagent/app/finetune/llm/ui/app.py",
"chars": 7625,
"preview": "\"\"\"\nFT (Fine-tune) Timeline Viewer\nHierarchical view: Session > Loop > Stage > EvoLoop > Events\n\nRun:\n streamlit run "
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/__init__.py",
"chars": 2198,
"preview": "\"\"\"Benchmark processors for core metric extraction.\n\nEach benchmark has its own processor that knows how to extract\nthe "
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/base.py",
"chars": 1670,
"preview": "\"\"\"Base class for benchmark core metric extraction.\"\"\"\n\nfrom abc import ABC, abstractmethod\n\n\nclass BenchmarkProcessor(A"
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/bioprobench.py",
"chars": 1965,
"preview": "\"\"\"BioProBench benchmark processor.\"\"\"\n\nfrom .base import BenchmarkProcessor\n\n\nclass BioProBenchProcessor(BenchmarkProce"
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/chemcotbench.py",
"chars": 3796,
"preview": "\"\"\"ChemCotBench benchmark processor.\"\"\"\n\nfrom .base import BenchmarkProcessor\n\n\nclass ChemCotBenchProcessor(BenchmarkPro"
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/financeiq.py",
"chars": 914,
"preview": "\"\"\"FinanceIQ benchmark processor.\"\"\"\n\nfrom .base import BenchmarkProcessor\n\n\nclass FinanceIQProcessor(BenchmarkProcessor"
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/panorama.py",
"chars": 1550,
"preview": "\"\"\"Panorama benchmark processor.\"\"\"\n\nfrom .base import BenchmarkProcessor\n\n\nclass PanoramaProcessor(BenchmarkProcessor):"
},
{
"path": "rdagent/app/finetune/llm/ui/benchmarks/tablebench.py",
"chars": 1816,
"preview": "\"\"\"TableBench benchmark processor.\"\"\"\n\nfrom .base import BenchmarkProcessor\n\n\nclass TableBenchProcessor(BenchmarkProcess"
},
{
"path": "rdagent/app/finetune/llm/ui/components.py",
"chars": 27850,
"preview": "\"\"\"\nFT UI Components - Hierarchical Event Renderers\n\"\"\"\n\nimport re\nfrom pathlib import Path\nfrom typing import Any\n\nimpo"
},
{
"path": "rdagent/app/finetune/llm/ui/config.py",
"chars": 1457,
"preview": "\"\"\"\nFT UI Configuration Constants\n\nCentralized configuration for FT Timeline Viewer.\n\"\"\"\n\nfrom typing import Literal\n\n# "
},
{
"path": "rdagent/app/finetune/llm/ui/data_loader.py",
"chars": 15846,
"preview": "\"\"\"\nFT UI Data Loader\nLoad pkl logs and convert to hierarchical timeline structure\n\"\"\"\n\nimport re\nfrom dataclasses impor"
},
{
"path": "rdagent/app/finetune/llm/ui/ft_summary.py",
"chars": 22187,
"preview": "\"\"\"\nFT Job Summary View\nDisplay summary table for all tasks in a job directory\n\"\"\"\n\nimport pickle\nfrom pathlib import Pa"
},
{
"path": "rdagent/app/finetune/share/eval.py",
"chars": 1971,
"preview": "from pathlib import Path\n\nfrom rdagent.components.coder.CoSTEER.evaluators import (\n CoSTEEREvaluator,\n CoSTEERSin"
},
{
"path": "rdagent/app/finetune/share/prompts.yaml",
"chars": 1146,
"preview": "prev_model_eval:\n system: |-\n You are a data scientist tasked with evaluating code generation. \n\n You will receiv"
},
{
"path": "rdagent/app/general_model/general_model.py",
"chars": 1572,
"preview": "import fire\n\nfrom rdagent.components.coder.model_coder.task_loader import (\n ModelExperimentLoaderFromPDFfiles,\n)\nfro"
},
{
"path": "rdagent/app/kaggle/conf.py",
"chars": 3714,
"preview": "from pydantic_settings import SettingsConfigDict\n\nfrom rdagent.core.conf import ExtendedBaseSettings\n\n\nclass KaggleBaseP"
},
{
"path": "rdagent/app/kaggle/loop.py",
"chars": 6454,
"preview": "import subprocess\nfrom typing import Any\n\nimport fire\n\nfrom rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING\nfrom"
},
{
"path": "rdagent/app/qlib_rd_loop/conf.py",
"chars": 6166,
"preview": "from typing import Optional\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.components.workflow.conf imp"
},
{
"path": "rdagent/app/qlib_rd_loop/factor.py",
"chars": 1999,
"preview": "\"\"\"\nFactor workflow with session control\n\"\"\"\n\nimport asyncio\nfrom pathlib import Path\nfrom typing import Any, Optional\n\n"
},
{
"path": "rdagent/app/qlib_rd_loop/factor_from_report.py",
"chars": 6894,
"preview": "import asyncio\nimport json\nfrom pathlib import Path\nfrom typing import Any, Dict, Tuple\n\nimport fire\n\nfrom rdagent.app.q"
},
{
"path": "rdagent/app/qlib_rd_loop/model.py",
"chars": 1350,
"preview": "\"\"\"\nModel workflow with session control\n\"\"\"\n\nimport asyncio\n\nimport fire\n\nfrom rdagent.app.qlib_rd_loop.conf import MODE"
},
{
"path": "rdagent/app/qlib_rd_loop/prompts.yaml",
"chars": 612,
"preview": "hypothesis_generation:\n system: |-\n You are an expert in financial analysis. Your task is to generate a well-reasone"
},
{
"path": "rdagent/app/qlib_rd_loop/quant.py",
"chars": 6856,
"preview": "\"\"\"\nQuant (Factor & Model) workflow with session control\n\"\"\"\n\nimport asyncio\nfrom typing import Any\n\nimport fire\n\nfrom r"
},
{
"path": "rdagent/app/rl/conf.py",
"chars": 1508,
"preview": "from pathlib import Path\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.core.conf import ExtendedBaseSe"
},
{
"path": "rdagent/app/rl/loop.py",
"chars": 1916,
"preview": "\"\"\"\nRL Post-training Entry Point\n\"\"\"\n\nimport asyncio\nfrom typing import Optional\n\nimport typer\nfrom typing_extensions im"
},
{
"path": "rdagent/app/rl/ui/__init__.py",
"chars": 26,
"preview": "\"\"\"RL Post-training UI\"\"\"\n"
},
{
"path": "rdagent/app/rl/ui/app.py",
"chars": 6040,
"preview": "\"\"\"\nRL Post-training Timeline Viewer\nHierarchical view: Session > Loop > Stage > Events\n\nRun:\n streamlit run rdagent/"
},
{
"path": "rdagent/app/rl/ui/components.py",
"chars": 10381,
"preview": "\"\"\"\nRL UI Components - Event Renderers\nSimplified version without EvoLoop\n\"\"\"\n\nfrom typing import Any\n\nimport streamlit "
},
{
"path": "rdagent/app/rl/ui/config.py",
"chars": 952,
"preview": "\"\"\"\nRL UI Configuration Constants\n\"\"\"\n\nfrom typing import Literal\n\n# Event type definition\nEventType = Literal[\n \"sce"
},
{
"path": "rdagent/app/rl/ui/data_loader.py",
"chars": 9126,
"preview": "\"\"\"\nRL UI Data Loader\nLoad pkl logs and convert to hierarchical timeline structure\nSimplified version: no EvoLoop (RL do"
},
{
"path": "rdagent/app/rl/ui/rl_summary.py",
"chars": 6041,
"preview": "\"\"\"\nRL Job Summary View\nDisplay summary table for all tasks in a job directory\n\"\"\"\n\nimport pickle\nfrom pathlib import Pa"
},
{
"path": "rdagent/app/utils/ape.py",
"chars": 1270,
"preview": "\"\"\"\nThis is the preliminary version of the APE (Automated Prompt Engineering)\n\"\"\"\n\nimport pickle\nfrom pathlib import Pat"
},
{
"path": "rdagent/app/utils/health_check.py",
"chars": 5842,
"preview": "import os\nimport socket\n\nimport docker\nimport fire\nimport litellm\nfrom litellm import completion, embedding\nfrom litellm"
},
{
"path": "rdagent/app/utils/info.py",
"chars": 3151,
"preview": "import importlib.metadata\nimport platform\nimport sys\nfrom pathlib import Path\n\nimport docker\nimport requests\nfrom packag"
},
{
"path": "rdagent/app/utils/prompts.yaml",
"chars": 5140,
"preview": "ape:\n system: |-\n We'll provide you with a pair of Chat QA about data science.\n We are creating solutions for a K"
},
{
"path": "rdagent/app/utils/ws.py",
"chars": 1833,
"preview": "from typing import Optional\n\nimport typer\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.componen"
},
{
"path": "rdagent/app/utils/ws_ft.py",
"chars": 1568,
"preview": "from typing import Optional\n\nimport typer\n\nfrom rdagent.app.finetune.llm.conf import FT_RD_SETTING\nfrom rdagent.componen"
},
{
"path": "rdagent/components/agent/__init__.py",
"chars": 66,
"preview": "\"\"\"\nSome agent that can be shared across different scenarios.\n\"\"\"\n"
},
{
"path": "rdagent/components/agent/base.py",
"chars": 2255,
"preview": "from abc import abstractmethod\n\nimport nest_asyncio\nfrom prefect import task\nfrom prefect.cache_policies import INPUTS\nf"
},
{
"path": "rdagent/components/agent/context7/__init__.py",
"chars": 1943,
"preview": "from typing import Optional\n\nfrom pydantic_ai.mcp import MCPServerStreamableHTTP\n\nfrom rdagent.components.agent.base imp"
},
{
"path": "rdagent/components/agent/context7/conf.py",
"chars": 801,
"preview": "\"\"\"\nThe context7 is based on a modified version of the context7.\n\nYou can follow the instructions to install it\n\n mkd"
},
{
"path": "rdagent/components/agent/context7/prompts.yaml",
"chars": 3154,
"preview": "# Context7 MCP Enhanced Query Prompts\n\nsystem_prompt: |-\n You are a helpful assistant.\n You help to user to search doc"
},
{
"path": "rdagent/components/agent/mcp/__init__.py",
"chars": 433,
"preview": "\"\"\"\nHere are a list of MCP servers.\n\nThe MCP server is a individual RESTful API. So the only following things are includ"
},
{
"path": "rdagent/components/agent/rag/__init__.py",
"chars": 685,
"preview": "from pydantic_ai.mcp import MCPServerStreamableHTTP\n\nfrom rdagent.components.agent.base import PAIAgent\nfrom rdagent.com"
},
{
"path": "rdagent/components/agent/rag/conf.py",
"chars": 418,
"preview": "\"\"\"\nSettings for RAG agent.\n\nTODO: how run the RAG mcp server\n\"\"\"\n\nfrom pydantic_settings import BaseSettings, SettingsC"
},
{
"path": "rdagent/components/benchmark/__init__.py",
"chars": 139,
"preview": "\"\"\"Shared benchmark evaluation utilities.\"\"\"\n\nfrom pathlib import Path\n\n# 共享配置目录\nBENCHMARK_CONFIGS_DIR = Path(__file__)."
},
{
"path": "rdagent/components/benchmark/conf.py",
"chars": 986,
"preview": "from dataclasses import field\nfrom pathlib import Path\nfrom typing import Optional\n\nfrom rdagent.core.conf import Extend"
},
{
"path": "rdagent/components/benchmark/configs/__init__.py",
"chars": 51,
"preview": "\"\"\"Shared OpenCompass benchmark configurations.\"\"\"\n"
},
{
"path": "rdagent/components/benchmark/configs/models.yaml",
"chars": 2628,
"preview": "# Model Inference Parameters Configuration\n# Used by benchmark.py to determine inference settings for different models\n\n"
},
{
"path": "rdagent/components/benchmark/configs/opencompass_template.yaml",
"chars": 3998,
"preview": "# Auto-generated OpenCompass Config for RD-Agent Benchmark\n# DO NOT EDIT MANUALLY - Generated by benchmark.py\n\ntemplate:"
},
{
"path": "rdagent/components/benchmark/eval_method.py",
"chars": 7712,
"preview": "from collections import defaultdict\nfrom pathlib import Path\nfrom typing import Dict, List, Tuple, Union\n\nimport pandas "
},
{
"path": "rdagent/components/benchmark/example.json",
"chars": 4318,
"preview": "{\n \"Turnover_Rate_Factor\": {\n \"description\": \"A traditional factor based on 20-day average turnover rate, adju"
},
{
"path": "rdagent/components/benchmark/utils.py",
"chars": 2598,
"preview": "\"\"\"Utilities shared by benchmark evaluators.\"\"\"\n\nfrom __future__ import annotations\n\nimport importlib\nimport logging\nimp"
},
{
"path": "rdagent/components/coder/CoSTEER/__init__.py",
"chars": 7284,
"preview": "from copy import deepcopy\nfrom datetime import datetime\nfrom pathlib import Path\n\nfrom rdagent.components.coder.CoSTEER."
},
{
"path": "rdagent/components/coder/CoSTEER/config.py",
"chars": 1142,
"preview": "from typing import Union\n\nfrom rdagent.core.conf import ExtendedBaseSettings\n\n\nclass CoSTEERSettings(ExtendedBaseSetting"
},
{
"path": "rdagent/components/coder/CoSTEER/evaluators.py",
"chars": 13957,
"preview": "import json\nfrom abc import abstractmethod\nfrom copy import deepcopy\nfrom dataclasses import dataclass, field\nfrom typin"
},
{
"path": "rdagent/components/coder/CoSTEER/evolvable_subjects.py",
"chars": 1158,
"preview": "from rdagent.core.evolving_framework import EvolvableSubjects\nfrom rdagent.core.experiment import Experiment, FBWorkspac"
},
{
"path": "rdagent/components/coder/CoSTEER/evolving_strategy.py",
"chars": 7268,
"preview": "from __future__ import annotations\n\nfrom abc import abstractmethod\nfrom typing import Callable, Generator\n\nfrom rdagent."
},
{
"path": "rdagent/components/coder/CoSTEER/knowledge_management.py",
"chars": 49156,
"preview": "from __future__ import annotations\n\nimport copy\nimport json\nimport pickle\nimport random\nimport re\nfrom itertools import "
},
{
"path": "rdagent/components/coder/CoSTEER/prompts.yaml",
"chars": 498,
"preview": "\nanalyze_component_prompt_v1_system: |-\n User is getting a new task that might consist of the components below (given i"
},
{
"path": "rdagent/components/coder/CoSTEER/task.py",
"chars": 430,
"preview": "from rdagent.core.experiment import Task\n\n\nclass CoSTEERTask(Task):\n def __init__(self, base_code: str = None, *args,"
},
{
"path": "rdagent/components/coder/data_science/conf.py",
"chars": 2764,
"preview": "from typing import Literal\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.components.coder.CoSTEE"
},
{
"path": "rdagent/components/coder/data_science/ensemble/__init__.py",
"chars": 6221,
"preview": "\"\"\"\nFile structure\n- ___init__.py: the entrance/agent of coder\n- evaluator.py\n- conf.py\n- exp.py: everything under the e"
},
{
"path": "rdagent/components/coder/data_science/ensemble/conf.py",
"chars": 101,
"preview": "# Configuration file for ensemble component\n# Currently empty as no specific configuration is needed\n"
},
{
"path": "rdagent/components/coder/data_science/ensemble/eval.py",
"chars": 3914,
"preview": "import json\nimport re\nfrom pathlib import Path\n\nfrom jinja2 import Environment, StrictUndefined\n\nfrom rdagent.app.data_s"
},
{
"path": "rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt",
"chars": 5154,
"preview": "\"\"\"\nTests for `ensemble_workflow` in ensemble.py\n\nA qualified ensemble_workflow implementation should:\n- Return predicti"
},
{
"path": "rdagent/components/coder/data_science/ensemble/exp.py",
"chars": 400,
"preview": "import pickle\nimport site\nimport traceback\nfrom pathlib import Path\nfrom typing import Dict, Optional\n\nfrom rdagent.comp"
},
{
"path": "rdagent/components/coder/data_science/ensemble/prompts.yaml",
"chars": 4991,
"preview": "ensemble_coder:\n system: |-\n You are a world-class data scientist and machine learning engineer with deep expertise "
},
{
"path": "rdagent/components/coder/data_science/ensemble/test.py",
"chars": 1679,
"preview": "\"\"\"\nHelper functions for testing the ensemble coder(CoSTEER-based) component.\n\"\"\"\n\nimport sys\nfrom pathlib import Path\n\n"
},
{
"path": "rdagent/components/coder/data_science/feature/__init__.py",
"chars": 5574,
"preview": "from pathlib import Path\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.components.coder.CoSTEER."
},
{
"path": "rdagent/components/coder/data_science/feature/eval.py",
"chars": 3284,
"preview": "from pathlib import Path\n\nfrom rdagent.components.coder.CoSTEER.evaluators import (\n CoSTEEREvaluator,\n CoSTEERSin"
},
{
"path": "rdagent/components/coder/data_science/feature/eval_tests/feature_test.txt",
"chars": 4036,
"preview": "\"\"\"\nTests for `feat_eng` in feature.py\n\"\"\"\n\n\nfrom copy import deepcopy\nimport sys\nimport numpy as np\nimport pandas as pd"
},
{
"path": "rdagent/components/coder/data_science/feature/exp.py",
"chars": 399,
"preview": "import pickle\nimport site\nimport traceback\nfrom pathlib import Path\nfrom typing import Dict, Optional\n\nfrom rdagent.comp"
},
{
"path": "rdagent/components/coder/data_science/feature/prompts.yaml",
"chars": 5751,
"preview": "feature_coder:\n system: |-\n You are a world-class data scientist and machine learning engineer with deep expertise i"
},
{
"path": "rdagent/components/coder/data_science/feature/test.py",
"chars": 1372,
"preview": "\"\"\"\nHelper functions for testing the feature coder(CoSTEER-based) component.\n- Does the developer loop work correctly\n\nI"
},
{
"path": "rdagent/components/coder/data_science/model/__init__.py",
"chars": 7189,
"preview": "from pathlib import Path\n\nfrom rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.components.coder.CoSTEER."
},
{
"path": "rdagent/components/coder/data_science/model/eval.py",
"chars": 4980,
"preview": "\"\"\"\nBeyond previous tests\n-\n\"\"\"\n\nimport json\nimport re\nfrom pathlib import Path\n\nfrom rdagent.app.data_science.conf impo"
},
{
"path": "rdagent/components/coder/data_science/model/eval_tests/model_test.txt",
"chars": 3571,
"preview": "\"\"\"\nTests for `model_workflow` in model01.py\n\"\"\"\nimport sys\nimport time\n\nfrom feature import feat_eng\nfrom load_data imp"
},
{
"path": "rdagent/components/coder/data_science/model/exp.py",
"chars": 614,
"preview": "from typing import Dict, Optional\n\nfrom rdagent.components.coder.CoSTEER.task import CoSTEERTask\n\n\n# Because we use isin"
},
{
"path": "rdagent/components/coder/data_science/model/prompts.yaml",
"chars": 8149,
"preview": "model_coder:\n system: |-\n You are a world-class data scientist and machine learning engineer with deep expertise in "
},
{
"path": "rdagent/components/coder/data_science/model/test.py",
"chars": 2499,
"preview": "\"\"\"\nGenerate dataset to test the model workflow output\n\"\"\"\n\nfrom pathlib import Path\n\nfrom rdagent.components.coder.CoST"
},
{
"path": "rdagent/components/coder/data_science/pipeline/__init__.py",
"chars": 6167,
"preview": "\"\"\"\n\nLoop should not large change exclude\n- Action Choice[current data loader & spec]\n- other should share\n - Propose"
},
{
"path": "rdagent/components/coder/data_science/pipeline/eval.py",
"chars": 17545,
"preview": "# tess successfully running.\n# (GPT) if it aligns with the spec & rationality of the spec.\nimport json\nimport re\nfrom da"
},
{
"path": "rdagent/components/coder/data_science/pipeline/eval_tests/submission_format_test.txt",
"chars": 3819,
"preview": "import hashlib\nfrom pathlib import Path\nimport pandas as pd\n\n\ndef calculate_md5(file_path):\n with open(file_path, \"rb"
},
{
"path": "rdagent/components/coder/data_science/pipeline/exp.py",
"chars": 439,
"preview": "from rdagent.components.coder.CoSTEER.task import CoSTEERTask\n\n\n# Because we use isinstance to distinguish between diffe"
},
{
"path": "rdagent/components/coder/data_science/pipeline/prompts.yaml",
"chars": 23903,
"preview": "pipeline_coder:\n system: |-\n You are a grandmaster-level data scientist and machine learning engineer with deep expe"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/README.md",
"chars": 275,
"preview": "# CoSTEER\n\n- subworkspace使用主experiment_workspace `RD-Agent/rdagent/scenarios/data_science/experiment/experiment.py`\n\n## "
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/__init__.py",
"chars": 9766,
"preview": "\"\"\"\n\nLoop should not large change exclude\n- Action Choice[current data loader & spec]\n- other should share\n - Propose"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/conf.py",
"chars": 0,
"preview": ""
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/eval.py",
"chars": 4106,
"preview": "# tess successfully running.\n# (GPT) if it aligns with the spec & rationality of the spec.\nimport json\nimport re\nfrom pa"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/eval_tests/data_loader_test.txt",
"chars": 2677,
"preview": "\"\"\"\nTests for `load_data` in load_data.py\n\"\"\"\n\nimport pickle\n\nimport pandas as pd\nfrom load_data import load_data\n\nimpor"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/exp.py",
"chars": 250,
"preview": "from rdagent.components.coder.CoSTEER.task import CoSTEERTask\n\n\n# Because we use isinstance to distinguish between diffe"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/prompts.yaml",
"chars": 20957,
"preview": "\nspec:\n system: |-\n You are a world-class data scientist and machine learning engineer with deep expertise in statis"
},
{
"path": "rdagent/components/coder/data_science/raw_data_loader/test.py",
"chars": 995,
"preview": "\"\"\"\nHelper functions for testing the raw_data_loader coder(CoSTEER-based) component.\n- Does the developer loop work corr"
},
{
"path": "rdagent/components/coder/data_science/share/doc.py",
"chars": 1267,
"preview": "\"\"\"\nDevelopers concentrating on writing documents for a workspace\n\"\"\"\n\nfrom rdagent.core.developer import Developer\nfrom"
},
{
"path": "rdagent/components/coder/data_science/share/ds_costeer.py",
"chars": 345,
"preview": "from rdagent.components.coder.CoSTEER import CoSTEER\n\n\nclass DSCoSTEER(CoSTEER):\n def get_develop_max_seconds(self) -"
},
{
"path": "rdagent/components/coder/data_science/share/eval.py",
"chars": 8236,
"preview": "import re\nfrom pathlib import Path\nfrom typing import Literal\n\nimport pandas as pd\n\nfrom rdagent.app.data_science.conf i"
},
{
"path": "rdagent/components/coder/data_science/share/notebook.py",
"chars": 5401,
"preview": "\"\"\"\nHandles conversion from a Python file to a Jupyter notebook.\n\"\"\"\n\nimport argparse\nfrom typing import Optional\n\nimpor"
},
{
"path": "rdagent/components/coder/data_science/share/prompts.yaml",
"chars": 6150,
"preview": "dump_model_coder:\n guideline: |-\n Your code will be executed in a inference mode with following command: \n ```bas"
},
{
"path": "rdagent/components/coder/data_science/share/util.py",
"chars": 13851,
"preview": "import ast\nimport io\nimport re\nimport tokenize\nfrom itertools import zip_longest\nfrom typing import List, Optional, Set,"
},
{
"path": "rdagent/components/coder/data_science/utils.py",
"chars": 275,
"preview": "import re\n\n\ndef remove_eda_part(stdout: str) -> str:\n \"\"\"Data Science scenario have a LLM-based EDA feature. We can r"
},
{
"path": "rdagent/components/coder/data_science/workflow/__init__.py",
"chars": 5355,
"preview": "from rdagent.app.data_science.conf import DS_RD_SETTING\nfrom rdagent.components.coder.CoSTEER.evaluators import (\n Co"
},
{
"path": "rdagent/components/coder/data_science/workflow/eval.py",
"chars": 7532,
"preview": "import json\nimport re\nfrom pathlib import Path\n\nimport pandas as pd\n\nfrom rdagent.app.data_science.conf import DS_RD_SET"
},
{
"path": "rdagent/components/coder/data_science/workflow/eval_tests/submission_format_test.txt",
"chars": 3253,
"preview": "from pathlib import Path\nimport pandas as pd\nimport hashlib\n\ndef calculate_md5(file_path):\n with open(file_path, \"rb\""
},
{
"path": "rdagent/components/coder/data_science/workflow/exp.py",
"chars": 517,
"preview": "import pickle\nimport site\nimport traceback\nfrom pathlib import Path\nfrom typing import Dict, Optional\n\nfrom rdagent.comp"
},
{
"path": "rdagent/components/coder/data_science/workflow/prompts.yaml",
"chars": 6675,
"preview": "workflow_coder:\n system: |-\n You are a world-class data scientist and machine learning engineer with deep expertise "
},
{
"path": "rdagent/components/coder/data_science/workflow/test.py",
"chars": 2289,
"preview": "\"\"\"\nGenerate dataset to test the workflow output\n\"\"\"\n\nfrom pathlib import Path\n\nfrom rdagent.components.coder.CoSTEER.co"
},
{
"path": "rdagent/components/coder/factor_coder/__init__.py",
"chars": 1297,
"preview": "from rdagent.components.coder.CoSTEER import CoSTEER\nfrom rdagent.components.coder.CoSTEER.evaluators import CoSTEERMult"
},
{
"path": "rdagent/components/coder/factor_coder/config.py",
"chars": 1675,
"preview": "import os\nfrom typing import Optional\n\nfrom pydantic_settings import SettingsConfigDict\n\nfrom rdagent.components.coder.C"
},
{
"path": "rdagent/components/coder/factor_coder/eva_utils.py",
"chars": 21733,
"preview": "import io\nimport json\nfrom abc import abstractmethod\nfrom typing import Dict, Tuple\n\nimport pandas as pd\n\nfrom rdagent.c"
},
{
"path": "rdagent/components/coder/factor_coder/evaluators.py",
"chars": 6221,
"preview": "import re\n\nfrom rdagent.components.coder.CoSTEER.evaluators import (\n CoSTEEREvaluator,\n CoSTEERMultiFeedback,\n "
},
{
"path": "rdagent/components/coder/factor_coder/evolving_strategy.py",
"chars": 8615,
"preview": "from __future__ import annotations\n\nimport json\nimport re\nfrom typing import Dict\n\nfrom rdagent.components.coder.CoSTEER"
},
{
"path": "rdagent/components/coder/factor_coder/factor.py",
"chars": 9527,
"preview": "from __future__ import annotations\n\nimport subprocess\nimport uuid\nfrom pathlib import Path\nfrom typing import Tuple, Uni"
},
{
"path": "rdagent/components/coder/factor_coder/factor_execution_template.txt",
"chars": 379,
"preview": "import os\n\nimport numpy as np\nimport pandas as pd\nfrom factor import feature_engineering_cls\n\nif os.path.exists(\"X_valid"
},
{
"path": "rdagent/components/coder/factor_coder/prompts.yaml",
"chars": 12502,
"preview": "\nevaluator_code_feedback_v1_system: |-\n User is trying to implement some factors in the following scenario:\n {{ scenar"
},
{
"path": "rdagent/components/coder/finetune/__init__.py",
"chars": 16543,
"preview": "\"\"\"\nLLM Fine-tuning CoSTEER Implementation\n\nThis module provides fine-tuning specific components for the CoSTEER framewo"
},
{
"path": "rdagent/components/coder/finetune/conf.py",
"chars": 14145,
"preview": "import json\nimport os\nimport re\nimport shutil\nfrom pathlib import Path\nfrom typing import Any, Literal\n\nfrom rdagent.app"
},
{
"path": "rdagent/components/coder/finetune/eval.py",
"chars": 16472,
"preview": "\"\"\"\nLLM Fine-tuning Evaluation Components\n\nProvides simplified evaluation: parameter filtering + micro-batch testing.\nNo"
}
]
// ... and 635 more files (download for full content)
About this extraction
This page contains the full source code of the microsoft/RD-Agent GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 835 files (4.5 MB), approximately 1.2M tokens, and a symbol index with 3037 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.