Repository: COLA-Laboratory/TransOPT
Branch: main
Commit: de8bf397d59b
Files: 419
Total size: 1.8 MB

Directory structure:
gitextract__y7m6kto/

├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── demo/
│   ├── analysis.py
│   ├── causal_analysis.py
│   ├── comparison/
│   │   ├── analysis_hypervolume.py
│   │   ├── analysis_plot.py
│   │   ├── experiment_gcc.py
│   │   ├── experiment_llvm.py
│   │   ├── features_by_workload_gcc.json
│   │   ├── features_by_workload_gcc_extra.json
│   │   ├── features_by_workload_llvm.json
│   │   ├── plot.py
│   │   ├── plot_samples_dbms.py
│   │   └── start_server.py
│   ├── correlation_analysis.py
│   ├── experiment_lsh_validity.py
│   ├── experiments.py
│   ├── importances/
│   │   ├── cal_relationship.py
│   │   ├── draw_obj_heatmap.py
│   │   └── get_feature_importances.py
│   ├── jacard_exec_times.csv
│   ├── lsh_exec_times.csv
│   ├── optimize_profile.prof
│   ├── random_sample_compiler.py
│   ├── random_sample_dbms.py
│   └── sampling/
│       ├── random_sample_compiler.py
│       └── random_sample_dbms.py
├── docs/
│   ├── Makefile
│   ├── make.bat
│   └── source/
│       ├── _static/
│       │   └── custom.css
│       ├── conf.py
│       ├── development/
│       │   ├── api_reference.rst
│       │   └── architecture.rst
│       ├── faq.rst
│       ├── home/
│       │   ├── feature.html
│       │   ├── guide.html
│       │   └── portfolio.html
│       ├── index.rst
│       ├── installation.rst
│       ├── quickstart.rst
│       └── usage/
│           ├── TOS.bib
│           ├── algorithms.rst
│           ├── cli.rst
│           ├── data_manage.rst
│           ├── problems.rst
│           ├── results.rst
│           └── visualization.rst
├── extra_requirements/
│   ├── analysis.json
│   └── remote.json
├── requirements.txt
├── resources/
│   └── docker/
│       └── absolut_image/
│           ├── Dockerfile
│           └── prepare_antigen.sh
├── scripts/
│   ├── init_csstuning.sh
│   └── init_docker.sh
├── setup.py
├── tests/
│   ├── EXP_NSGA2.py
│   ├── EXP_NSGA2_restart.py
│   ├── EXP_bohb.py
│   ├── EXP_grid.py
│   ├── EXP_hebo.py
│   ├── EXP_hyperopt.py
│   ├── EXP_random.py
│   ├── EXP_smac.py
│   ├── EXP_tpe.py
│   └── data_analysis.py
├── transopt/
│   ├── ResultAnalysis/
│   │   ├── AnalysisBase.py
│   │   ├── AnalysisPipeline.py
│   │   ├── AnalysisReport.py
│   │   ├── CasualAnalysis.py
│   │   ├── CompileTex.py
│   │   ├── CorrelationAnalysis.py
│   │   ├── MakeGif.py
│   │   ├── PFAnalysis.py
│   │   ├── PlotAnalysis.py
│   │   ├── ReportNote.py
│   │   ├── TableAnalysis.py
│   │   ├── TableToLatex.py
│   │   ├── TrackOptimization.py
│   │   └── __init__.py
│   ├── __init__.py
│   ├── agent/
│   │   ├── __init__.py
│   │   ├── app.py
│   │   ├── chat/
│   │   │   ├── openai_chat.py
│   │   │   ├── prompt
│   │   │   ├── prompt.bak
│   │   │   └── yaml_generator.py
│   │   ├── config.py
│   │   ├── registry.py
│   │   ├── run_cli.py
│   │   ├── services.py
│   │   └── testood.py
│   ├── analysis/
│   │   ├── compile_tex.py
│   │   ├── effect_size.py
│   │   ├── mds.py
│   │   ├── parameter_network.py
│   │   ├── table.py
│   │   └── table_to_latex.py
│   ├── benchmark/
│   │   ├── CPD/
│   │   │   └── __init__.py
│   │   ├── CSSTuning/
│   │   │   ├── Compiler.py
│   │   │   ├── DBMS.py
│   │   │   └── __init__.py
│   │   ├── DownloadBench/
│   │   │   └── references
│   │   ├── HBOROB/
│   │   │   ├── algorithms.py
│   │   │   ├── hporobust.py
│   │   │   └── test.py
│   │   ├── HPO/
│   │   │   ├── HPO.py
│   │   │   ├── HPOAdaBoost.py
│   │   │   ├── HPOSVM.py
│   │   │   ├── HPOXGBoost.py
│   │   │   ├── __init__.py
│   │   │   ├── algorithms.py
│   │   │   ├── augmentation.py
│   │   │   ├── datasets.py
│   │   │   ├── fast_data_loader.py
│   │   │   ├── hparams_registry.py
│   │   │   ├── image_options.py
│   │   │   ├── misc.py
│   │   │   ├── networks.py
│   │   │   ├── test_model.py
│   │   │   ├── visualization.py
│   │   │   └── wide_resnet.py
│   │   ├── HPOB/
│   │   │   ├── HpobBench.py
│   │   │   └── plot.py
│   │   ├── HPOOOD/
│   │   │   ├── algorithms.py
│   │   │   ├── collect_results.py
│   │   │   ├── download.py
│   │   │   ├── fast_data_loader.py
│   │   │   ├── hparams_registry.py
│   │   │   ├── hpoood.py
│   │   │   ├── misc.py
│   │   │   ├── networks.py
│   │   │   ├── ooddatasets.py
│   │   │   └── wide_resnet.py
│   │   ├── RL/
│   │   │   ├── LunarlanderBenchmark.py
│   │   │   └── __init__.py
│   │   ├── __init__.py
│   │   ├── instantiate_problems.py
│   │   ├── problem_base/
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   ├── non_tab_problem.py
│   │   │   ├── tab_problem.py
│   │   │   └── transfer_problem.py
│   │   └── synthetic/
│   │       ├── MovingPeakBenchmark.py
│   │       ├── MultiObjBenchmark.py
│   │       ├── __init__.py
│   │       └── synthetic_problems.py
│   ├── datamanager/
│   │   ├── __init__.py
│   │   ├── database.py
│   │   ├── lsh.py
│   │   ├── manager.py
│   │   └── minhash.py
│   ├── optimizer/
│   │   ├── MultiObjOptimizer/
│   │   │   ├── CauMOpt.py
│   │   │   ├── IEIPV.py
│   │   │   ├── MoeadEGO.py
│   │   │   ├── ParEGO.py
│   │   │   ├── SMSEGO.py
│   │   │   └── __init__.py
│   │   ├── SingleObjOptimizer/
│   │   │   ├── KrigingOptimizer.py
│   │   │   ├── LFL.py
│   │   │   ├── MetaLearningOptimizer.py
│   │   │   ├── MultitaskOptimizer.py
│   │   │   ├── PROptimizer.py
│   │   │   ├── RBFNOptimizer.py
│   │   │   ├── RGPEOptimizer.py
│   │   │   ├── TPEOptimizer.py
│   │   │   ├── VizerOptimizer.py
│   │   │   └── __init__.py
│   │   ├── __init__.py
│   │   ├── acquisition_function/
│   │   │   ├── ConformalLCB.py
│   │   │   ├── __init__.py
│   │   │   ├── acf_base.py
│   │   │   ├── ei.py
│   │   │   ├── get_acf.py
│   │   │   ├── lcb.py
│   │   │   ├── model_manage/
│   │   │   │   ├── CMAESBest.py
│   │   │   │   ├── CMAESGeneration.py
│   │   │   │   ├── CMAESPreSelect.py
│   │   │   │   ├── DEBest.py
│   │   │   │   ├── DEGeneration.py
│   │   │   │   ├── DEPreSelect.py
│   │   │   │   ├── GABest.py
│   │   │   │   ├── GAGeneration.py
│   │   │   │   ├── GAPreSelect.py
│   │   │   │   ├── PSOBest.py
│   │   │   │   ├── PSOGeneration.py
│   │   │   │   └── PSOPreSelect.py
│   │   │   ├── moeadego.py
│   │   │   ├── pi.py
│   │   │   ├── piei.py
│   │   │   ├── sequential.py
│   │   │   ├── smsego.py
│   │   │   └── taf.py
│   │   ├── construct_optimizer.py
│   │   ├── model/
│   │   │   ├── HyperBO.py
│   │   │   ├── __init__.py
│   │   │   ├── bohb.py
│   │   │   ├── deepkernel.py
│   │   │   ├── dyhpo.py
│   │   │   ├── get_model.py
│   │   │   ├── gp.py
│   │   │   ├── hebo.py
│   │   │   ├── mhgp.py
│   │   │   ├── mlp.py
│   │   │   ├── model_base.py
│   │   │   ├── moeadego.py
│   │   │   ├── mtgp.py
│   │   │   ├── neuralprocess.py
│   │   │   ├── parego.py
│   │   │   ├── pr.py
│   │   │   ├── rbfn.py
│   │   │   ├── rf.py
│   │   │   ├── rgpe.py
│   │   │   ├── sgpt.py
│   │   │   ├── smsego.py
│   │   │   └── utils.py
│   │   ├── normalizer/
│   │   │   ├── __init__.py
│   │   │   ├── normalizer_base.py
│   │   │   └── standerd.py
│   │   ├── optimizer_base/
│   │   │   ├── EvoOptimizerBase.py
│   │   │   ├── __init__.py
│   │   │   ├── base.py
│   │   │   └── bo.py
│   │   ├── pretrain/
│   │   │   ├── __init__.py
│   │   │   ├── deepkernelpretrain.py
│   │   │   ├── get_pretrain.py
│   │   │   ├── hyper_bo.py
│   │   │   └── pretrain_base.py
│   │   ├── refiner/
│   │   │   ├── __init__.py
│   │   │   ├── box.py
│   │   │   ├── ellipse.py
│   │   │   ├── get_refiner.py
│   │   │   ├── prune.py
│   │   │   └── refiner_base.py
│   │   ├── sampler/
│   │   │   ├── __init__.py
│   │   │   ├── get_sampler.py
│   │   │   ├── gradient.py
│   │   │   ├── grid.py
│   │   │   ├── lhs.py
│   │   │   ├── lhs_BAK.py
│   │   │   ├── meta.py
│   │   │   ├── random.py
│   │   │   ├── sampler_base.py
│   │   │   └── sobel.py
│   │   └── selector/
│   │       ├── __init__.py
│   │       ├── fuzzy_selector.py
│   │       ├── lsh_selector.py
│   │       └── selector_base.py
│   ├── remote/
│   │   ├── __init__.py
│   │   ├── celeryconfig.py
│   │   ├── experiment_client.py
│   │   ├── experiment_server.py
│   │   ├── experiment_tasks.py
│   │   └── server_manager.sh
│   ├── space/
│   │   ├── __init__.py
│   │   ├── fidelity_space.py
│   │   ├── search_space.py
│   │   └── variable.py
│   └── utils/
│       ├── Initialization.py
│       ├── Kernel.py
│       ├── Normalization.py
│       ├── Prior.py
│       ├── Read.py
│       ├── Visualization.py
│       ├── __init__.py
│       ├── check.py
│       ├── encoding.py
│       ├── hypervolume.py
│       ├── log.py
│       ├── openml_data_manager.py
│       ├── pareto.py
│       ├── path.py
│       ├── plot.py
│       ├── profile.py
│       ├── rng_helper.py
│       ├── serialization.py
│       ├── sk.py
│       └── weights.py
└── webui/
    ├── .gitignore
    ├── LICENSE.md
    ├── package.json
    ├── public/
    │   ├── index.html
    │   ├── manifest.json
    │   ├── robots.txt
    │   └── transopt.psd
    ├── src/
    │   ├── App.css
    │   ├── App.js
    │   ├── App.test.js
    │   ├── app/
    │   │   ├── auth.js
    │   │   ├── init.js
    │   │   └── store.js
    │   ├── components/
    │   │   ├── CalendarView/
    │   │   │   ├── index.js
    │   │   │   └── util.js
    │   │   ├── Cards/
    │   │   │   └── TitleCard.js
    │   │   ├── Input/
    │   │   │   ├── InputText.js
    │   │   │   ├── SearchBar.js
    │   │   │   ├── SelectBox.js
    │   │   │   ├── TextAreaInput.js
    │   │   │   └── ToogleInput.js
    │   │   └── Typography/
    │   │       ├── ErrorText.js
    │   │       ├── HelperText.js
    │   │       ├── Subtitle.js
    │   │       └── Title.js
    │   ├── containers/
    │   │   ├── Header.js
    │   │   ├── Layout.js
    │   │   ├── LeftSidebar.js
    │   │   ├── ModalLayout.js
    │   │   ├── PageContent.js
    │   │   ├── RightSidebar.js
    │   │   ├── SidebarSubmenu.js
    │   │   └── SuspenseContent.js
    │   ├── features/
    │   │   ├── algorithm/
    │   │   │   ├── components/
    │   │   │   │   ├── OptTable.js
    │   │   │   │   └── SelectPlugin.js
    │   │   │   └── index.js
    │   │   ├── analytics/
    │   │   │   ├── charts/
    │   │   │   │   ├── Box.js
    │   │   │   │   ├── Trajectory.js
    │   │   │   │   └── my_theme.json
    │   │   │   ├── components/
    │   │   │   │   ├── LineChart.js
    │   │   │   │   └── SelectTask.js
    │   │   │   └── index.js
    │   │   ├── calendar/
    │   │   │   ├── CalendarEventsBodyRightDrawer.js
    │   │   │   └── index.js
    │   │   ├── charts/
    │   │   │   ├── components/
    │   │   │   │   ├── BarChart.js
    │   │   │   │   ├── DoughnutChart.js
    │   │   │   │   ├── LineChart.js
    │   │   │   │   ├── PieChart.js
    │   │   │   │   ├── ScatterChart.js
    │   │   │   │   └── StackBarChart.js
    │   │   │   └── index.js
    │   │   ├── chatbot/
    │   │   │   ├── ChatBot.js
    │   │   │   └── components/
    │   │   │       ├── ChatUI.js
    │   │   │       └── chatui-theme.css
    │   │   ├── common/
    │   │   │   ├── components/
    │   │   │   │   ├── ConfirmationModalBody.js
    │   │   │   │   └── NotificationBodyRightDrawer.js
    │   │   │   ├── headerSlice.js
    │   │   │   ├── modalSlice.js
    │   │   │   └── rightDrawerSlice.js
    │   │   ├── dashboard/
    │   │   │   ├── components/
    │   │   │   │   ├── AmountStats.js
    │   │   │   │   ├── BarChart.js
    │   │   │   │   ├── DashboardStats.js
    │   │   │   │   ├── DashboardTopBar.js
    │   │   │   │   ├── DoughnutChart.js
    │   │   │   │   ├── Footprint.js
    │   │   │   │   ├── Importance.js
    │   │   │   │   ├── LineChart.js
    │   │   │   │   ├── PageStats.js
    │   │   │   │   ├── ScatterChart.js
    │   │   │   │   ├── Trajectory.js
    │   │   │   │   ├── UserChannels.js
    │   │   │   │   └── my_theme.json
    │   │   │   └── index.js
    │   │   ├── documentation/
    │   │   │   ├── DocComponents.js
    │   │   │   ├── DocFeatures.js
    │   │   │   ├── DocGettingStarted.js
    │   │   │   └── components/
    │   │   │       ├── DocComponentsContent.js
    │   │   │       ├── DocComponentsNav.js
    │   │   │       ├── FeaturesContent.js
    │   │   │       ├── FeaturesNav.js
    │   │   │       ├── GettingStartedContent.js
    │   │   │       └── GettingStartedNav.js
    │   │   ├── experiment/
    │   │   │   ├── components/
    │   │   │   │   ├── DashboardStats.js
    │   │   │   │   ├── SearchData.js
    │   │   │   │   ├── SelectAlgorithm.js
    │   │   │   │   ├── SelectData.js
    │   │   │   │   └── SelectTask.js
    │   │   │   └── index.js
    │   │   ├── integration/
    │   │   │   └── index.js
    │   │   ├── leads/
    │   │   │   ├── components/
    │   │   │   │   └── AddLeadModalBody.js
    │   │   │   ├── index.js
    │   │   │   └── leadSlice.js
    │   │   ├── run/
    │   │   │   ├── components/
    │   │   │   │   ├── DataTable.js
    │   │   │   │   ├── OptTable.js
    │   │   │   │   ├── Run.js
    │   │   │   │   ├── RunProgress.js
    │   │   │   │   └── TaskTable.js
    │   │   │   └── index.js
    │   │   ├── seldata/
    │   │   │   ├── components/
    │   │   │   │   ├── DataTable.js
    │   │   │   │   ├── SearchData.js
    │   │   │   │   ├── SelectData.css
    │   │   │   │   └── SelectData.js
    │   │   │   └── index.js
    │   │   ├── settings/
    │   │   │   ├── billing/
    │   │   │   │   └── index.js
    │   │   │   ├── profilesettings/
    │   │   │   │   └── index.js
    │   │   │   └── team/
    │   │   │       └── index.js
    │   │   ├── transactions/
    │   │   │   └── index.js
    │   │   └── user/
    │   │       ├── ForgotPassword.js
    │   │       ├── LandingIntro.js
    │   │       ├── Login.js
    │   │       ├── Register.js
    │   │       └── components/
    │   │           └── TemplatePointers.js
    │   ├── index.css
    │   ├── index.js
    │   ├── pages/
    │   │   ├── GettingStarted.js
    │   │   └── protected/
    │   │       ├── 404.js
    │   │       ├── Algorithm.js
    │   │       ├── Analytics.js
    │   │       ├── Bills.js
    │   │       ├── Blank.js
    │   │       ├── Calendar.js
    │   │       ├── Charts.js
    │   │       ├── ChatOpt.js
    │   │       ├── Dashboard.js
    │   │       ├── Experiment.js
    │   │       ├── Integration.js
    │   │       ├── Leads.js
    │   │       ├── ProfileSettings.js
    │   │       ├── Run.js
    │   │       ├── Seldata.js
    │   │       ├── Team.js
    │   │       ├── Transactions.js
    │   │       └── Welcome.js
    │   ├── reportWebVitals.js
    │   ├── routes/
    │   │   ├── index.js
    │   │   └── sidebar.js
    │   ├── setupTests.js
    │   └── utils/
    │       ├── dummyData.js
    │       └── globalConstantUtil.js
    └── tailwind.config.js

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
data/
var/
log/
wheels/
*.egg-info/
.installed.cfg
*.egg


# PyInstaller
# 通常如果您使用PyInstaller，以下目录应该被忽略
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

.idea
.vscode

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

*.log
experiment_results/
collected_results/
demo/comparison/frames/
demo/comparison/gifs/
demo/comparison/pngs/
demo/comparison/htmls/
demo/draw/
demo/importances/pngs/
```

**/__pycache__/

run.sh
run1.sh
fetch_data.py
test.sh
sample_points.py

================================================
FILE: LICENSE
================================================
BSD 3-Clause License

Copyright (c) 2023, peilimao
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived from
   this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: MANIFEST.in
================================================


================================================
FILE: README.md
================================================
<p align="center">
  <a href="https://maopl.github.io/TransOpt-doc/">
    <img src="./docs/source/_static/figures/transopt_logo.jpg" alt="" width="40%" align="top">
  </a>
</p>
<p align="center">
  TransOPT: Transfer Optimization System for Bayesian Optimization Using Transfer Learning<br>
  <a href="https://maopl.github.io/TransOpt-doc/">Docs</a> |
  <a href="https://maopl.github.io/TransOpt-doc/quickstart.html">Tutorials</a> |
  <a href="https://maopl.github.io/TransOpt-doc/usage/problems.html">Examples</a> |
  <a href="">Paper</a> |
  <a href="https://maopl.github.io/TransOpt-doc">Citation</a> |
</p>

<div align="center">

<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/code%20style-black-000000.svg"></a>
<a href="https://github.com/psf/black"><img alt="Code style: black" src="https://img.shields.io/badge/python_version-3.10-purple"></a>

</div>


# Welcome to TransOPT!

**TransOPT** is an open-source software platform designed to facilitate the **design, benchmarking, and application of transfer learning for Bayesian optimization (TLBO)** algorithms through a modular, data-centric framework.

## Features

- **Contains more than 1000 benchmark problems covers diverse range of domains**.  
- **Build custom optimization algorithms as easily as stacking building blocks**.  
- **Leverage historical data to achieve more efficient and informed optimization**.  
- **Deploy experiments through an intuitive web UI and monitor results in real-time**.

TransOPT empowers researchers and developers to explore innovative optimization solutions effortlessly, bridging the gap between theory and practical application.

# [Installation: how to install TransOPT](https://maopl.github.io/TransOpt-doc/installation.html)

TransOPT is composed of two main components: the backend for data processing and business logic, and the frontend for user interaction. Each can be installed as follows:

### Prerequisites

Before installing TransOPT, you must have the following installed:

- **Python 3.10+**: Ensure Python is installed.
- **Node.js 17.9.1+ and npm 8.11.0+**: These are required to install and build the frontend. [Download Node.js](https://nodejs.org/en/download/)

Please install these prerequisites if they are not already installed on your system.

1. Clone the repository:
   ```shell
   $ git clone https://github.com/maopl/TransOpt.git
   ```

2. Install the required dependencies:
   ```shell
   $ cd TransOpt
   $ python setup.py install
   ```

3. Install the frontend dependencies:
   ```shell
   $ cd webui && npm install
   ```

### Start the Backend Agent

To start the backend agent, use the following command:

```bash
$ python transopt/agent/app.py
```

### Web User Interface Mode

When TransOPT has been started successfully, go to the webui directory and start the web UI on your local machine. Enable the user interface mode with the following command:
```bash
cd webui && npm start
```

This will open the TransOPT interface in your default web browser at `http://localhost:3000`.


### Command Line Mode

In addition to the web UI mode, TransOPT also offers a Command Line (CMD) mode for users who may not have access to a display screen, such as when working on a remote server.

To run TransOPT in CMD mode, use the following command:

```bash
python transopt/agent/run_cli.py -n Sphere -v 3 -o 1 -m RF -acf UCB -b 300
```

This command sets up a task named Sphere with 3 variables and 1 objectives, using a Random Forest model (RF) as surrogate model and the upper confidence bound (UCB) acquisition function, with a budget of 300 function evaluations.

For a complete list of available options and more detailed usage instructions, please refer to the [CLI documentation](https://maopl.github.io/TransOpt-doc/usage/cli.html).


# [Documentation: The TransOPT Process](https://maopl.github.io/TransOpt-doc/)

Our docs walk you through using TransOPT, web UI and key API points. For an overview of the system and workflow for project management, see our documentation [documentation](https://maopl.github.io/TransOpt-doc/).


<p align="center">
<img src="./docs/source/_static/figures/Transopt_workflow.png" width="95%">
</p>


# Why use TransOPT?

Recent years, Bayesian optimization (BO) has been widely used in various fields, such as hyperparameter optimization, molecular design, and synthetic biology. However, conventional BO is not that efficient, where it conduct every optimization task from scratch while ignoring the experiences gained from previous problem-solving practices. To address this challenge, transfer learning (TL) has been introduced to BO, aiming to leverage auxillary data to improve the optimization efficiency and performance. Despite the potential of TLBO, the usage of TLBO is still limited due to the complexity of advanced TLBO methods. TransOPT, a system that facilitates:

- development of TLBO algorithms;
- benchmarking the performance of TLBO methods;
- applications of TLBO for downstream tasks;

<p align="center">
<img src="./docs/source/_static/figures/Results.png" width="95%">
</p>

**Upper-left:** illustrates the use of a web UI to construct new optimization algorithms by combining different components. **Upper-right:** highlights the application of an LLM agent to effectively manage optimization tasks. **Middle:** shows various visualization results derived from the optimization processes. **Lower:** presents a performance comparison of different TLBO methods.


# Reference & Citation

If you find our work helpful to your research, please consider citing our:

```bibtex
@article{TransOPT,
  title = {{TransOPT}: Transfer Optimization System for Bayesian Optimization Using Transfer Learning},
  author = {Author Name and Collaborator Name},
  url = {https://github.com/maopl/TransOPT},
  year = {2024}
}
```


================================================
FILE: demo/analysis.py
================================================
import logging
import os
import argparse

from pathlib import Path
from transopt.ResultAnalysis.AnalysisPipeline import analysis_pipeline


def run_analysis(Exper_folder:Path, tasks, methods, seeds, args):
    logger = logging.getLogger(__name__)
    analysis_pipeline(Exper_folder, tasks=tasks, methods=methods, seeds=seeds, args=args)


if __name__ == '__main__':
    tasks = {
        # 'cp': {'budget': 8, 'time_stamp': 2, 'params': {'input_dim': 2}},
        'Ackley': {'budget': 11, 'time_stamp': 3, 'params':{'input_dim':1}},
        # 'MPB': {'budget': 110, 'time_stamp': 3},
        # 'Griewank': {'budget': 11, 'time_stamp': 3,  'params':{'input_dim':1}},
        # 'DixonPrice': {'budget': 110, 'time_stamp': 3},
        # 'Lunar': {'budget': 110, 'time_stamp': 3},
        # 'XGB': {'budget': 110, 'time_stamp': 3},
    }
    Methods_list = {'MTBO', 'BO'}
    Seeds = [1,2,3,4,5]

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument("-in", "--init_number", type=int, default=0)
    parser.add_argument("-p", "--exp_path", type=str, default='../LFL_experiments')
    parser.add_argument("-n", "--exp_name", type=str, default='test')  # 实验名称，保存在experiments中
    parser.add_argument("-c", "--comparision", type=bool, default=True)
    parser.add_argument("-a", "--track", type=bool, default=True)
    parser.add_argument("-r", "--report", type=bool, default=False)


    args = parser.parse_args()
    Exp_name = args.exp_name
    Exp_folder = args.exp_path
    Exper_folder = '{}/{}'.format(Exp_folder, Exp_name)
    Exper_folder = Path(Exper_folder)
    run_analysis(Exper_folder, tasks=tasks, methods=Methods_list, seeds = Seeds, args=args)


================================================
FILE: demo/causal_analysis.py
================================================
import logging
import os
import argparse

from pathlib import Path
from transopt.ResultAnalysis.AnalysisPipeline import analysis_pipeline


def run_analysis(Exper_folder:Path, tasks, methods, seeds, args):
    logger = logging.getLogger(__name__)
    analysis_pipeline(Exper_folder, tasks=tasks, methods=methods, seeds=seeds, args=args)


if __name__ == '__main__':
    tasks = {
        "GCC": {"budget": samples_num, "workloads": workloads},
        "LLVM": {"budget": samples_num, "workloads": workloads},
    }

    available_workloads = CompilerBenchmarkBase.AVAILABLE_WORKLOADS
    split_workloads = split_into_segments(available_workloads, 10)

    if split_index >= len(split_workloads):
        raise IndexError("split index out of range")

    workloads = split_workloads[split_index]

    tasks = {
        "GCC": {"budget": samples_num, "workloads": workloads},
        "LLVM": {"budget": samples_num, "workloads": workloads},
    }
    Methods_list = {'MTBO', 'BO'}
    Seeds = [1,2,3,4,5]

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument("-in", "--init_number", type=int, default=0)
    parser.add_argument("-p", "--exp_path", type=str, default='../LFL_experiments')
    parser.add_argument("-n", "--exp_name", type=str, default='test')  # 实验名称，保存在experiments中
    parser.add_argument("-c", "--comparision", type=bool, default=True)
    parser.add_argument("-a", "--track", type=bool, default=True)
    parser.add_argument("-r", "--report", type=bool, default=False)


    args = parser.parse_args()
    Exp_name = args.exp_name
    Exp_folder = args.exp_path
    Exper_folder = '{}/{}'.format(Exp_folder, Exp_name)
    Exper_folder = Path(Exper_folder)
    run_analysis(Exper_folder, tasks=tasks, methods=Methods_list, seeds = Seeds, args=args)


================================================
FILE: demo/comparison/analysis_hypervolume.py
================================================
import sys
from pathlib import Path

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

import json

import numpy as np
import pandas as pd
import scipy.stats

from transopt.utils.pareto import calc_hypervolume, find_pareto_front
from transopt.utils.plot import plot3D

target = "gcc"

results_path = package_path / "experiment_results"
gcc_results = results_path / "gcc_archive_new"
llvm_results = results_path / "llvm_archive"

algorithm_list = ["ParEGO", "SMSEGO", "MoeadEGO", "CauMO"]
objectives = ["execution_time", "file_size", "compilation_time"]
seed_list = [65535, 65536, 65537, 65538, 65539]


def load_and_prepare_data(file_path, objectives):
    """
    Loads JSON data and prepares a DataFrame.
    """
    # print(f"Loading data from {file_path}")
    with open(file_path, "r") as f:
        data = json.load(f)
        data = data.get("1", {})

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)
    # print(f"Loaded {len(df_combined)} data points")

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())

    for obj in objectives:
        df_combined = df_combined[df_combined[obj] != 1e10]

    # print(f"Loaded {len(df_combined)} data points, removed {len(df_input) - len(df_combined)} duplicates")
    # print()
    return df_combined

def load_data(workload, algorithm, seed):
    if target == "llvm":
        result_file = llvm_results / f"llvm_{workload}" / algorithm / f"{seed}_KB.json"
    else:
        result_file = gcc_results / f"gcc_{workload}" / algorithm / f"{seed}_KB.json"
    df = load_and_prepare_data(result_file, objectives)
    return df

def collect_all_data(workload):
    all_data = []
    for algorithm in algorithm_list:
        for seed in seed_list:
            df = load_data(workload, algorithm, seed)
            all_data.append(df[objectives].values)
    all_data = np.vstack(all_data)
    global_mean = all_data.mean(axis=0)
    global_std = all_data.std(axis=0)
    return all_data, global_mean, global_std


def calculate_mean_hypervolume(
    algorithm, workload, global_stats1, global_stats2, normalization_type="min-max"
):
    """
    Calculate mean hypervolume for a given algorithm across all seeds.

    Parameters:
    global_stats1: Global mean or min of all objectives (depending on normalization_type)
    global_stats2: Global std or max of all objectives (depending on normalization_type)
    normalization_type: 'min-max' or 'mean' for different types of normalization
    """
    hypervolume_list = []
    for seed in seed_list:
        df = load_data(workload, algorithm, seed)

        if normalization_type == "mean":
            # Apply mean normalization
            normalized_df = (df[objectives] - global_stats1) / global_stats2
        elif normalization_type == "min-max":
            # Apply min-max normalization
            normalized_df = (df[objectives] - global_stats1) / (
                global_stats2 - global_stats1
            )
        else:
            raise ValueError(
                "Unsupported normalization type. Choose 'mean' or 'min-max'."
            )

        pareto_front = find_pareto_front(normalized_df.values)
        hypervolume = calc_hypervolume(pareto_front, np.ones(len(objectives)))
        # print(f"{algorithm} {seed} {hypervolume}")
        hypervolume_list.append(hypervolume)

    return np.mean(hypervolume_list)


def calculate_hypervolumes(
    algorithm, workload, global_stats1, global_stats2, normalization_type="min-max"
):
    """
    Calculate hypervolumes for a given algorithm across all seeds.

    Parameters:
    global_stats1: Global mean or min of all objectives (depending on normalization_type)
    global_stats2: Global std or max of all objectives (depending on normalization_type)
    normalization_type: 'min-max' or 'mean' for different types of normalization
    """
    hypervolume_list = []
    for seed in seed_list:
        df = load_data(workload, algorithm, seed)

        if normalization_type == "mean":
            normalized_df = (df[objectives] - global_stats1) / global_stats2
        elif normalization_type == "min-max":
            normalized_df = (df[objectives] - global_stats1) / (global_stats2 - global_stats1)
        else:
            raise ValueError("Unsupported normalization type. Choose 'mean' or 'min-max'.")

        pareto_front = find_pareto_front(normalized_df.values)
        hypervolume = calc_hypervolume(pareto_front, np.ones(len(objectives)))
        hypervolume_list.append(hypervolume)

    return hypervolume_list

def analyze_and_compare_algorithms(workload_results):
    analysis_results = {}

    for workload, algorithms in workload_results.items():
        workload_analysis = {
            'means': {},
            'std_devs': {},
            'significance': {}
        }

        # 计算每种算法的平均超体积和标准差，并找出最佳算法
        best_algorithm = None
        best_mean_hv = -float('inf')
        for algorithm, hypervolumes in algorithms.items():
            mean_hv = np.mean(hypervolumes)
            workload_analysis['means'][algorithm] = mean_hv
            workload_analysis['std_devs'][algorithm] = np.std(hypervolumes)

            if mean_hv > best_mean_hv:
                best_mean_hv = mean_hv
                best_algorithm = algorithm

        # 对每个算法进行显著性检验，只与最佳算法比较
        for algorithm, hypervolumes in algorithms.items():
            if algorithm != best_algorithm:
                stat, p_value = scipy.stats.mannwhitneyu(algorithms[best_algorithm], hypervolumes)
                comparison_key = f"{algorithm} vs {best_algorithm}"
                workload_analysis['significance'][comparison_key] = ('+' if p_value < 0.05 else '-')
                
        # # 进行算法间的显著性检验
        # algorithm_names = list(algorithms.keys())
        # for i in range(len(algorithm_names)):
        #     for j in range(i+1, len(algorithm_names)):
        #         hypervolumes1 = algorithms[algorithm_names[i]]
        #         hypervolumes2 = algorithms[algorithm_names[j]]
        #         stat, p_value = scipy.stats.mannwhitneyu(hypervolumes1, hypervolumes2)
        #         comparison_key = f"{algorithm_names[i]} vs {algorithm_names[j]}"
        #         workload_analysis['significance'][comparison_key] = ('+' if p_value < 0.05 else '-')

        analysis_results[workload] = workload_analysis

    return analysis_results

def matrix_to_latex(analysis_results, caption):
    latex_code = []

    # 添加文档类和宏包
    latex_code.extend([
        "\\documentclass{article}",
        "\\usepackage{geometry}",
        "\\geometry{a4paper, margin=1in}",
        "\\usepackage{graphicx}",
        "\\usepackage{colortbl}",
        "\\usepackage{booktabs}",
        "\\usepackage{threeparttable}",
        "\\usepackage{caption}",
        "\\usepackage{xcolor}",
        "\\pagestyle{empty}",
        "\\begin{document}",
        "\\begin{table*}[t!]",
        "    \\scriptsize",
        "    \\centering",
        f"    \\caption{{{caption}}}",
        "    \\resizebox{1.0\\textwidth}{!}{",
        "    \\begin{tabular}{c|" + "".join(["c"] * len(analysis_results)) + "}",
        "        \\hline"
    ])

    # 确定算法列表
    algorithms = list(analysis_results[next(iter(analysis_results))]['means'].keys())

    # 添加列名（每个算法一个列）
    col_header = " & ".join([""] + [f"\\texttt{{{algorithm}}}" for algorithm in algorithms]) + " \\\\"
    latex_code.append("        " + col_header)
    latex_code.append("        \\hline")

    # 添加行
    for workload in analysis_results.keys():
        row_data = [workload]
        best_algorithm = max(analysis_results[workload]['means'], key=analysis_results[workload]['means'].get)
        for algorithm in analysis_results[workload]['means'].keys():
            mean = analysis_results[workload]['means'][algorithm]
            std_dev = analysis_results[workload]['std_devs'][algorithm]
            significance_mark = ""

            if algorithm != best_algorithm:
                for other_algorithm, sig_value in analysis_results[workload]['significance'].items():
                    if algorithm in other_algorithm and sig_value == '+':
                        significance_mark = "$^\\dagger$"
                        break

            if algorithm == best_algorithm:
                row_data.append(f"\\cellcolor[rgb]{{.682, .667, .667}}\\textbf{{{mean:.3f} (±{std_dev:.3f})}}{significance_mark}")
            else:
                row_data.append(f"{mean:.3f} (±{std_dev:.3f}){significance_mark}")

        latex_code.append("        " + " & ".join(row_data) + " \\\\")

    # 添加表注
    latex_code.extend([
        "        \\hline",
        "    \\end{tabular}",
        "    }",
        "    \\begin{tablenotes}",
        "        \\tiny",
        "        \\item $^\\dagger$ indicates that the best algorithm is significantly better than the other one according to the Wilcoxon signed-rank test at a 5\\% significance level."
        "    \\end{tablenotes}",
        "\\end{table*}%",
        "\\end{document}"
    ])
    
        # latex_code.append("        " + " & ".join(row_data)
                          
    # # 添加列名
    # col_header = " & ".join([""] + list(analysis_results.keys())) + " \\\\"
    # latex_code.append("        " + col_header)
    # latex_code.append("        \\hline")

    # # 添加行
    # for algorithm in analysis_results[next(iter(analysis_results))]['means'].keys():
    #     row_data = [f"\\texttt{{{algorithm}}}"]
    #     for workload, results in analysis_results.items():
    #         mean = results['means'][algorithm]
    #         std_dev = results['std_devs'][algorithm]
    #         significance_mark = ""

    #         for other_algorithm, sig_value in results['significance'].items():
    #             if algorithm in other_algorithm and sig_value == '+':
    #                 significance_mark = "$^\\dagger$"
    #                 break

    #         row_data.append(f"{mean:.3f} (±{std_dev:.3f}){significance_mark}")
    #     latex_code.append("        " + " & ".join(row_data) + " \\\\")
        
    return "\n".join(latex_code)


def load_workloads():
    file_path = package_path / "demo" / "comparison" / f"features_by_workload_{target}.json"
    with open(file_path, "r") as f:
        return json.load(f).keys()


if __name__ == "__main__":
    workloads = load_workloads()

    workloads = list(workloads)
    workloads.sort()
    workloads = workloads[:14]
    
    # workloads = [
    #     "cbench-automotive-qsort1",
    #     "cbench-automotive-susan-e",
    #     "cbench-network-patricia",
    #     "cbench-automotive-bitcount",
    #     "cbench-bzip2",
    #     "cbench-telecom-adpcm-d",
    #     "cbench-office-stringsearch2",
    #     "cbench-security-rijndael",
    #     "cbench-security-sha",
    # ]

    workload_results = {}
    for workload in workloads:
        print(f"Processing workload: {workload}")
        all_data, global_mean, global_std = collect_all_data(workload)
        global_max = all_data.max(axis=0)
        global_min = all_data.min(axis=0)

        algorithm_results = {}
        for algorithm in algorithm_list:
            hypervolumes = calculate_hypervolumes(
                algorithm,
                workload,
                global_min,
                global_max,
                normalization_type="min-max",
            )
            algorithm_results[algorithm] = hypervolumes
        
        # Remove the prefix from the workload name ""
        workload_short_name = workload[7:]
        workload_results[workload_short_name] = algorithm_results

    final_results = analyze_and_compare_algorithms(workload_results)
    print(final_results)

    caption = "Perfomance Comparison of Algorithms"
    latex_table = matrix_to_latex(final_results, caption)

    latex_table_path = "latex_table.tex"
    with open(latex_table_path, 'w') as file:
        file.write(latex_table)
        
    # for workload in workloads:
    #     print(workload)
    #     all_data, global_mean, global_std = collect_all_data(workload)
    #     global_max = all_data.max(axis=0)
    #     global_min = all_data.min(axis=0)

    #     hv_list = []
    #     for algorithm in algorithm_list:
    #         mean_hypervolume = calculate_mean_hypervolume(
    #             algorithm,
    #             workload,
    #             global_min,
    #             global_max,
    #             normalization_type="min-max",
    #         )
    #         hv_list.append((algorithm, mean_hypervolume))

    #     # Sort by hypervolume
    #     hv_list.sort(key=lambda x: x[1], reverse=True)

    #     print(hv_list)
    #     print()


================================================
FILE: demo/comparison/analysis_plot.py
================================================
import sys
from pathlib import Path

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

import json
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# import plotly.graph_objects as go
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

from transopt.utils.pareto import calc_hypervolume, find_pareto_front
from transopt.utils.plot import plot3D

target = "gcc"
results_path = package_path / "experiment_results"
gcc_results_path = results_path / "gcc_comparsion"
gcc_samples_path = results_path / "gcc_samples"
llvm_results = results_path / "llvm_comparsion"
llvm_samples_path = results_path / "llvm_samples"

dbms_samples_path = results_path / "dbms_samples"

algorithm_list = ["ParEGO", "SMSEGO", "MoeadEGO", "CauMO"]
# algorithm_list = ["SMSEGO"]
# objectives = ["execution_time", "file_size", "compilation_time"]
objectives = ["latency", "throughput"]
seed_list = [65535, 65536, 65537, 65538, 65539]


def load_and_prepare_data(file_path):
    """
    Loads JSON data and prepares a DataFrame.
    """
    # print(f"Loading data from {file_path}")
    with open(file_path, "r") as f:
        data = json.load(f)
        if "1" in data:
            data = data["1"]

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)
    print(f"Loaded {len(df_combined)} data points")

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())

    for obj in objectives:
        df_combined = df_combined[df_combined[obj] != 1e10]

    print(f"Loaded {len(df_combined)} data points, removed {len(df_input) - len(df_combined)} duplicates")
    print()
    return df_combined

def load_data(workload, algorithm, seed):
    if target == "llvm":
        result_file = llvm_results / f"llvm_{workload}" / algorithm / f"{seed}_KB.json"
    else:
        result_file = gcc_results_path / f"gcc_{workload}" / algorithm / f"{seed}_KB.json"
    df = load_and_prepare_data(result_file)
    return df

def collect_all_data(workload):
    all_data = []
    for algorithm in algorithm_list:
        for seed in seed_list:
            df = load_data(workload, algorithm, seed)
            all_data.append(df[objectives].values)
    all_data = np.vstack(all_data)
    global_mean = all_data.mean(axis=0)
    global_std = all_data.std(axis=0)
    return all_data, global_mean, global_std


def dynamic_plot(workload, algorithm, seed):
    """
    Dynamically plot the three objectives for a given workload and algorithm for a specific seed.
    """
    # Collect all data to understand the range
    all_data, global_mean, global_std = collect_all_data(workload)
    global_min = np.min(all_data, axis=0)
    global_max = np.max(all_data, axis=0)
   
    # Load data for the specific seed
    df = load_data(workload, algorithm, seed)
    
    # Normalize data (Min-Max normalization)
    df_normalized = (df[objectives] - global_min) / (global_max - global_min)
     
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_title(f"Dynamic Plot for {workload} - {algorithm} - Seed {seed}")
    ax.set_xlabel(objectives[0])
    ax.set_ylabel(objectives[1])
    ax.set_zlabel(objectives[2])

    # Initialize two scatter plots: one for all previous points, one for the new point
    previous_points = ax.scatter([], [], [], c='b', marker='o')  # all previous points in blue
    current_point = ax.scatter([], [], [], c='r', marker='o')  # current point in red
    
    def init():
        previous_points._offsets3d = ([], [], [])
        current_point._offsets3d = ([], [], [])
        return previous_points, current_point

    def update(frame):
        # Add all previous points up to the current frame
        previous_points._offsets3d = (df_normalized.iloc[:frame][objectives[0]].values,
                                      df_normalized.iloc[:frame][objectives[1]].values,
                                      df_normalized.iloc[:frame][objectives[2]].values)

        # Add the current point (latest one in the sequence)
        current_point._offsets3d = (df_normalized.iloc[frame:frame+1][objectives[0]].values,
                                    df_normalized.iloc[frame:frame+1][objectives[1]].values,
                                    df_normalized.iloc[frame:frame+1][objectives[2]].values)
        return previous_points, current_point
    
    frames = len(df)
    ani = FuncAnimation(fig, update, frames=frames, blit=False, repeat=False)
    
    # Save the plot to a file
    gif_path = package_path / "demo" / "comparison" / "gifs" / f"{target}_{algorithm}_{workload}_{seed}.gif"
    ani.save(gif_path, writer='imagemagick')
    plt.close(fig)  # Close the plot to free memory


# def dynamic_plot_html(workload, algorithm, seed):
    """
    Dynamically plot the three objectives for a given workload and algorithm for a specific seed using Plotly.
    """
    # Collect all data to understand the range
    all_data, global_mean, global_std = collect_all_data(workload)
    global_min = np.min(all_data, axis=0)
    global_max = np.max(all_data, axis=0)
   
    # Load data for the specific seed
    df = load_data(workload, algorithm, seed)
    
    # Normalize data (Min-Max normalization)
    df_normalized = (df[objectives] - global_min) / (global_max - global_min)
    df_normalized = df_normalized

    pareto_front, pareto_front_index = find_pareto_front(df_normalized.values, return_index=True)
    df_normalized = df_normalized.iloc[pareto_front_index]
    
    # Create traces for previous and current points
    trace1 = go.Scatter3d(x=[], y=[], z=[], mode='markers', marker=dict(size=5, color='blue'))
    trace2 = go.Scatter3d(x=[], y=[], z=[], mode='markers', marker=dict(size=5, color='red'))

    # Combine traces into a data list
    data = [trace1, trace2]

    # Create the layout of the plot
    layout = go.Layout(
        title=f"Dynamic Plot for {workload} - {algorithm} - Seed {seed}",
        scene=dict(
            xaxis=dict(title=objectives[0], range=[0, 1]),
            yaxis=dict(title=objectives[1], range=[0, 1]),
            zaxis=dict(title=objectives[2], range=[0, 1])
        )
    )
    
    # Create the figure
    fig = go.Figure(data=data, layout=layout)

    # Create frames for the animation
    frames = []
    for t in range(len(df)):
        frame = go.Frame(
            data=[
                go.Scatter3d(
                    x=df_normalized.iloc[:t+1][objectives[0]].values,
                    y=df_normalized.iloc[:t+1][objectives[1]].values,
                    z=df_normalized.iloc[:t+1][objectives[2]].values,
                    mode='markers',
                    marker=dict(size=5, color='blue')
                ),
                go.Scatter3d(
                    x=df_normalized.iloc[t:t+1][objectives[0]].values,
                    y=df_normalized.iloc[t:t+1][objectives[1]].values,
                    z=df_normalized.iloc[t:t+1][objectives[2]].values,
                    mode='markers',
                    marker=dict(size=5, color='red')
                )
            ]
        )
        frames.append(frame)

    fig.frames = frames
   
    prev_frame_button = dict(
        args=[None, {"frame": {"duration": 0, "redraw": False}, "mode": "immediate", "transition": {"duration": 0}}],
        label='Previous',
        method='animate'
    )

    next_frame_button = dict(
        args=[None, {"frame": {"duration": 0, "redraw": False}, "mode": "immediate", "transition": {"duration": 0}}],
        label='Next',
        method='animate'
    )

    fig.update_layout(
        updatemenus=[dict(
            type='buttons',
            showactive=False,
            y=0,
            x=1.05,
            xanchor='right',
            yanchor='top',
            pad=dict(t=0, r=10),
            buttons=[prev_frame_button, next_frame_button]
        )]
    )
 
    # fig.update_layout(sliders=sliders)

    # Save the plot to HTML file
    html_path = package_path / "demo" / "comparison" / "htmls" / f"dynamic_{target}_{algorithm}_{workload}_{seed}.html"
    fig.write_html(str(html_path))


def save_individual_frames(workload, algorithm, seed):
    """
    Save each frame of the three objectives as a separate plot for a given workload, algorithm, and seed.
    """
    # Load data for the specific seed
    df = load_data(workload, algorithm, seed)

    # Ensure the directory for saving frames exists
    frames_dir = package_path / "demo" / "comparison" / "frames" / f"{algorithm}_{workload}_{seed}"
    os.makedirs(frames_dir, exist_ok=True)
    
    for idx in range(len(df)):
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        
        # Add data points from the DataFrame row by row
        x, y, z = df.iloc[idx][objectives[0]], df.iloc[idx][objectives[1]], df.iloc[idx][objectives[2]]
        
        # Plot and customize as needed
        ax.scatter(x, y, z, color='r')
        ax.set_title(f"Frame {idx} for {workload} - {algorithm} - Seed {seed}")
        ax.set_xlabel(objectives[0])
        ax.set_ylabel(objectives[1])
        ax.set_zlabel(objectives[2])
        
        # Save the plot as a file
        frame_file = frames_dir / f"frame_{idx:04d}.png"
        plt.savefig(frame_file)
        plt.close(fig)  # Close the plot to free memory
        

def load_workloads():
    file_path = package_path / "demo" / "comparison" / f"features_by_workload_{target}.json"
    with open(file_path, "r") as f:
        return json.load(f).keys()


# def plot_pareto_front_html(workload):
    # df = load_and_prepare_data(gcc_samples_path / f"GCC_{workload}.json")
    df = load_and_prepare_data(llvm_samples_path / f"LLVM_{workload}.json")
    df_normalized = (df - df.min()) / (df.max() - df.min())
    _, pareto_indices = find_pareto_front(df_normalized[objectives].values, return_index=True)
    
    # Retrieve Pareto points
    pareto_points = df_normalized.iloc[pareto_indices][objectives]
    
    # Create a 3D scatter plot using plotly
    fig = go.Figure(data=[go.Scatter3d(
        x=pareto_points[objectives[0]],
        y=pareto_points[objectives[1]],
        z=pareto_points[objectives[2]],
        mode='markers',
        marker=dict(
            size=5,
            color='blue',  # set color to blue
            opacity=0.8
        )
    )])

    # Update the layout
    fig.update_layout(
        title=f"Pareto Front for {workload}",
        scene=dict(
            xaxis_title=objectives[0],
            yaxis_title=objectives[1],
            zaxis_title=objectives[2]
        )
    )

    # Define the path for HTML file
    html_path = package_path / "demo" / "comparison" / "htmls"
    # Ensure the directory exists
    html_path.mkdir(parents=True, exist_ok=True)

    # Save the plot as an HTML file
    fig.write_html(str(html_path / f"{target}_pareto_front_{workload}.html"))


def plot_pareto_front(workload):
    # df = load_and_prepare_data(gcc_samples_path / f"GCC_{workload}.json")
    # df = load_and_prepare_data(llvm_samples_path / f"LLVM_{workload}.json")
    df = load_data(workload, "ParEGO", 65535)
    df_normalized = (df - df.min()) / (df.max() - df.min())
    _, pareto_indices = find_pareto_front(df_normalized[objectives].values, return_index=True)
    
    # Retrieve Pareto points
    points = df_normalized.iloc[pareto_indices][objectives]
    
    # Create a 3D scatter plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_title(f"Pareto Front for {workload}")
    ax.set_xlabel(objectives[0])
    ax.set_ylabel(objectives[1])
    ax.set_zlabel(objectives[2])
    
    # # Scatter plot for Pareto front
    # points = df_normalized[objectives]
    
    # Convert Series to NumPy array before plotting
    x_values = points[objectives[0]].values
    y_values = points[objectives[1]].values
    z_values = points[objectives[2]].values

    ax.scatter(x_values, y_values, z_values, c='b', marker='o')

    # Save the plot as a file
    file_path = package_path / "demo" / "comparison" / "pngs" / f"{target}_pf_{workload}.png"
    plt.savefig(file_path)
    plt.close(fig)  # Close the plot to free memory
    
    
def plot_all(workload, algorithm=""):
    # df = load_and_prepare_data(llvm_samples_path / f"LLVM_{workload}.json")
    # df = load_and_prepare_data(gcc_samples_path / f"GCC_{workload}.json")
    # df = load_data(workload, algorithm, 65535)
    df = load_and_prepare_data(dbms_samples_path / f"DBMS_{workload}.json")
    df_normalized = (df - df.min()) / (df.max() - df.min())
    df_normalized = df
    
    # Create a 3D scatter plot
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_title(f"All samples for {workload}")
    ax.set_xlabel(objectives[0])
    ax.set_ylabel(objectives[1])
    ax.set_zlabel(objectives[2])
    
    # Scatter plot for Pareto front
    points = df_normalized[objectives]
    
    # Convert Series to NumPy array before plotting
    x_values = points[objectives[0]].values
    y_values = points[objectives[1]].values
    z_values = points[objectives[2]].values

    ax.scatter(x_values, y_values, z_values, c='b', marker='o')

    # Save the plot as a file
    file_path = package_path / "demo" / "comparison" / "pngs" / f"{target}_{workload}.png"
    plt.savefig(file_path)
    plt.close(fig)  # Close the plot to free memory
    
# 2D plot all
def plot_all_2d(workload, algorithm=""):
    # df = load_and_prepare_data(llvm_samples_path / f"LLVM_{workload}.json")
    # df = load_and_prepare_data(gcc_samples_path / f"GCC_{workload}.json")
    # df = load_data(workload, algorithm, 65535)
    df = load_and_prepare_data(dbms_samples_path / f"DBMS_{workload}.json")
    df_normalized = (df - df.min()) / (df.max() - df.min())
    df_normalized = df
    
    # Create a 2D scatter plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title(f"All samples for {workload}")
    ax.set_xlabel(objectives[0])
    ax.set_ylabel(objectives[1])
    
    # Scatter plot for Pareto front
    points = df_normalized[objectives]
    
    # Convert Series to NumPy array before plotting
    x_values = points[objectives[0]].values
    y_values = points[objectives[1]].values

    ax.scatter(x_values, y_values, c='b', marker='o')

    # Save the plot as a file
    file_path = package_path / "demo" / "comparison" / "pngs" / f"{target}_{workload}.png"
    plt.savefig(file_path)
    plt.close(fig)  # Close the plot to free memory

if __name__ == "__main__":
    # workloads = load_workloads()
 
    # workloads = [
    #     "cbench-consumer-tiff2bw",
    #     "cbench-security-rijndael",
        
    #     "cbench-security-pgp", 
    #     "polybench-cholesky",
    #     "cbench-consumer-tiff2rgba",
    #     "cbench-network-patricia",
    #     # "cbench-automotive-susan-e",
    #     # "polybench-symm",
    #     "cbench-consumer-mad",
    #     "polybench-lu"
    # ]
    
    # workloads = [
    #     "cbench-security-sha",
    #     "cbench-telecom-adpcm-c",
    #     ""
    # ]
    
    # LLVM
    workloads_improved = [
        "cbench-telecom-gsm",
        "cbench-automotive-qsort1",
        "cbench-automotive-susan-e",
        "cbench-consumer-tiff2rgba",
        "cbench-network-patricia",
        "cbench-consumer-tiff2bw",
        "cbench-consumer-jpeg-d",
        "cbench-telecom-adpcm-c",
        "cbench-security-rijndael",
        "cbench-security-sha",
    ]
        
        
    workloads_mysql = [
        "sibench",
        "smallbank",
        "voter",
        "tatp",
        "tpcc",
        "twitter",
    ]
    seed = 65535  # Example seed
    
    # Plot sampling results
    for workload in workloads_mysql:
        # for algorithm in algorithm_list:
        plot_all_2d(workload)
        # plot_pareto_front(workload)
    
    # for algorithm in algorithm_list:
    #     # dynamic_plot_html("cbench-consumer-tiff2bw", algorithm, seed)
    #     for workload in workloads:
    #         dynamic_plot_html(workload, algorithm, seed)
            # dynamic_plot(workload, algorithm, seed)
        # save_individual_frames(workload, algorithm, objectives, seed)

================================================
FILE: demo/comparison/experiment_gcc.py
================================================
import sys
from pathlib import Path

current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent.parent
sys.path.insert(0, str(package_dir))

import argparse
import json
import os

import numpy as np
from csstuning.compiler.compiler_benchmark import CompilerBenchmarkBase

from transopt.benchmark import instantiate_problems
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TransferDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def execute_tasks(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = instantiate_problems(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    lst = list(lst)
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


def get_workloads(workloads, split_index, total_splits=10):
    segments = split_into_segments(workloads, total_splits)
    if split_index >= len(segments):
        raise IndexError("split index out of range")

    return segments[split_index]


def load_features():
    file_path = package_dir / "demo" / "comparison" / "features_by_workload_gcc_extra.json"
    with open(file_path, "r") as f:
        return json.load(f)


def configure_experiment(workload, features, seed, optimizer_name, exp_path, budget=20, init_number=10):
    exp_name = f"gcc_{workload}"
    args = argparse.Namespace(
        seed=seed,
        optimizer=optimizer_name,
        budget=budget,
        init_number=init_number,
        pop_size=init_number,
        init_method="random",
        exp_path=exp_path,
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        acquisition_func="LCB",
    )
    tasks = {
        "GCC": {
            "budget": budget,
            "workloads": [workload],
            "knobs": features[workload]["top"],
        },
    }
    return tasks, args

def main(optimizers = [], repeat=5, budget=500, init_number=21):
    features = load_features()

    parser = argparse.ArgumentParser(description="Run optimization experiments")
    parser.add_argument("--split_index", type=int, default=0,
                        help="Index for splitting the workload segments")
    args = parser.parse_args()

    available_workloads = [
        "polybench-3mm",
        "cbench-automotive-susan-c",
        "cbench-consumer-tiff2dither",
        "cbench-automotive-bitcount",
        "polybench-2mm",
        "polybench-adi",
        "cbench-office-stringsearch2",
        "polybench-fdtd-2d",
        "polybench-atax",
        "polybench-doitgen",
        "polybench-durbin",
        "polybench-fdtd-apml",
        "polybench-gemver",
        "polybench-gesummv",      
    ]
    # available_workloads = features.keys()
    
    workloads = get_workloads(available_workloads, args.split_index)

    exp_path = package_dir / "experiment_results"

    for optimizer_name in optimizers:
        for workload in workloads:
            for i in range(repeat):
                tasks, exp_args = configure_experiment(
                    workload,
                    features,
                    65535 + i,
                    optimizer_name,
                    exp_path,
                    budget,
                    init_number,
                )
                execute_tasks(tasks, exp_args)


def main_debug(repeat=1, budget=20, init_number=10):
    features = load_features()

    parser = argparse.ArgumentParser(description="Run optimization experiments")
    parser.add_argument("--split_index", type=int, default=9,
                        help="Index for splitting the workload segments")
    args = parser.parse_args()

    workloads = get_workloads(features.keys(), args.split_index)[:1]

    workloads = ["cbench-consumer-jpeg-d"]
    exp_path = package_dir / "experiment_results"

    for optimizer_name in ["MoeadEGO"]:
        for workload in workloads:
            for i in range(repeat):
                tasks, exp_args = configure_experiment(
                    workload,
                    features,
                    65535 + i,
                    optimizer_name,
                    exp_path,
                    budget,
                    init_number,
                )
                execute_tasks(tasks, exp_args)


if __name__ == "__main__":
    debug = True
    debug = False
    if debug:
        main_debug(repeat=5, budget=500, init_number=10)
    else:
        main(["ParEGO", "SMSEGO", "MoeadEGO", "CauMO"], repeat=5, budget=500, init_number=21)


================================================
FILE: demo/comparison/experiment_llvm.py
================================================
import sys
from pathlib import Path

current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent.parent
sys.path.insert(0, str(package_dir))

import argparse
import json
import os

import numpy as np
from csstuning.compiler.compiler_benchmark import CompilerBenchmarkBase

from transopt.benchmark import instantiate_problems
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TransferDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def execute_tasks(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = instantiate_problems(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    lst = list(lst)
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


def get_workloads(workloads, split_index, total_splits=10):
    segments = split_into_segments(workloads, total_splits)
    if split_index >= len(segments):
        raise IndexError("split index out of range")

    return segments[split_index]


def load_features(file_path):
    with open(file_path, "r") as f:
        return json.load(f)


def configure_experiment(workload, features, seed, optimizer_name, exp_path, budget=20, init_number=10):
    exp_name = f"llvm_{workload}"
    args = argparse.Namespace(
        seed=seed,
        optimizer=optimizer_name,
        budget=budget,
        init_number=init_number,
        init_method="random",
        exp_path=exp_path,
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        acquisition_func="LCB",
    )
    tasks = {
        "LLVM": {
            "budget": budget,
            "workloads": [workload],
            "knobs": features[workload]["top"],
        },
    }
    return tasks, args

def main(optimizers = [], repeat=5, budget=500, init_number=21):
    features_file = package_dir / "demo" / "comparison" / "features_by_workload_llvm.json"
    features = load_features(features_file)

    parser = argparse.ArgumentParser(description="Run optimization experiments")
    parser.add_argument("--split_index", type=int, default=0,
                        help="Index for splitting the workload segments")
    args = parser.parse_args()

    workloads = get_workloads(features.keys(), args.split_index)

    exp_path = Path.cwd() / "experiment_results"

    for optimizer_name in optimizers:
        for workload in workloads:
            for i in range(repeat):
                tasks, exp_args = configure_experiment(
                    workload,
                    features,
                    65535 + i,
                    optimizer_name,
                    exp_path,
                    budget,
                    init_number,
                )
                execute_tasks(tasks, exp_args)


def main_debug(repeat=1, budget=20, init_number=10):
    features_file = package_dir / "demo" / "comparison" / "features_by_workload_llvm.json"
    features = load_features(features_file)

    parser = argparse.ArgumentParser(description="Run optimization experiments")
    parser.add_argument("--split_index", type=int, default=0,
                        help="Index for splitting the workload segments")
    args = parser.parse_args()

    workloads = get_workloads(features.keys(), args.split_index)[:1]

    exp_path = Path.cwd() / "experiment_results"

    for optimizer_name in ["MoeadEGO"]:
        for workload in workloads:
            for i in range(repeat):
                tasks, exp_args = configure_experiment(
                    workload,
                    features,
                    65535 + i,
                    optimizer_name,
                    exp_path,
                    budget,
                    init_number,
                )
                execute_tasks(tasks, exp_args)


if __name__ == "__main__":
    # debug = True
    debug = False
    if debug:
        main_debug(repeat=1, budget=20, init_number=11)
    else:
        main(["ParEGO", "MoeadEGO", "SMSEGO", "CauMO"], repeat=5, budget=500, init_number=21)


================================================
FILE: demo/comparison/features_by_workload_gcc.json
================================================
{
    "cbench-consumer-tiff2bw": {
        "common": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "inline-functions",
            "align-loops",
            "align-functions",
            "gcse"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "inline-functions",
            "align-loops",
            "align-functions",
            "gcse",
            "tree-ch",
            "tree-loop-vectorize",
            "vect-cost-model",
            "tree-vrp",
            "tree-pre",
            "schedule-insns2",
            "tree-dominator-opts",
            "inline-small-functions",
            "expensive-optimizations",
            "tree-ter",
            "code-hoisting",
            "ipa-cp",
            "forward-propagate"
        ]
    },
    "cbench-security-rijndael": {
        "common": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions",
            "expensive-optimizations",
            "gcse",
            "schedule-insns2",
            "tree-ter",
            "guess-branch-probability",
            "tree-pre",
            "code-hoisting",
            "tree-vrp",
            "tree-sra",
            "dse",
            "tree-dominator-opts",
            "peel-loops",
            "if-conversion",
            "tree-fre",
            "rerun-cse-after-loop",
            "omit-frame-pointer"
        ]
    },
    "cbench-security-pgp": {
        "common": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions",
            "inline-functions",
            "inline-small-functions",
            "gcse",
            "schedule-insns2",
            "tree-vrp",
            "tree-dominator-opts",
            "tree-ccp",
            "guess-branch-probability",
            "expensive-optimizations",
            "tree-ch",
            "peel-loops",
            "tree-partial-pre",
            "tree-loop-vectorize",
            "code-hoisting",
            "dse",
            "caller-saves"
        ]
    },
    "polybench-cholesky": {
        "common": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions",
            "peel-loops",
            "tree-ch",
            "guess-branch-probability",
            "tree-loop-vectorize",
            "reorder-blocks-algorithm",
            "ipa-cp",
            "inline-small-functions",
            "unswitch-loops",
            "math-errno",
            "inline-functions-called-once",
            "optimize-strlen",
            "tree-vrp",
            "partial-inlining",
            "reorder-blocks-and-partition",
            "ipa-icf-functions",
            "associative-math"
        ]
    },
    "cbench-telecom-crc32": {
        "common": [
            "align-jumps",
            "inline-small-functions",
            "align-labels",
            "inline-functions",
            "align-loops",
            "align-functions",
            "tree-ch"
        ],
        "top": [
            "align-jumps",
            "inline-small-functions",
            "align-labels",
            "inline-functions",
            "align-loops",
            "align-functions",
            "tree-ch",
            "guess-branch-probability",
            "omit-frame-pointer",
            "schedule-insns2",
            "expensive-optimizations",
            "tree-vrp",
            "caller-saves",
            "gcse",
            "tree-dominator-opts",
            "cx-limited-range ",
            "compare-elim",
            "tree-pre",
            "split-loops",
            "reorder-functions"
        ]
    },
    "polybench-fdtd-apml": {
        "common": [
            "align-jumps",
            "tree-ccp",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch"
        ],
        "top": [
            "align-jumps",
            "tree-ccp",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch",
            "unsafe-math-optimizations",
            "tree-pre",
            "tree-fre",
            "gcse",
            "guess-branch-probability",
            "inline-functions-called-once",
            "omit-frame-pointer",
            "code-hoisting",
            "tree-dominator-opts",
            "tree-vrp",
            "tree-loop-vectorize",
            "gcse-after-reload",
            "move-loop-invariants",
            "hoist-adjacent-loads"
        ]
    },
    "cbench-network-patricia": {
        "common": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "split-loops",
            "vect-cost-model"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "split-loops",
            "vect-cost-model",
            "inline-functions",
            "inline-small-functions",
            "optimize-strlen",
            "tree-vrp",
            "gcse",
            "schedule-insns2",
            "tree-copy-prop",
            "reorder-blocks",
            "tree-dominator-opts",
            "reorder-blocks-and-partition",
            "tree-pta",
            "tree-ch",
            "if-conversion"
        ]
    },
    "cbench-consumer-tiff2rgba": {
        "common": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "tree-ch",
            "tree-loop-vectorize",
            "vect-cost-model",
            "tree-pre",
            "schedule-insns2",
            "tree-vrp",
            "gcse",
            "tree-dominator-opts",
            "inline-small-functions",
            "tree-ter",
            "inline-functions",
            "expensive-optimizations",
            "tree-pta",
            "omit-frame-pointer",
            "code-hoisting"
        ]
    },
    "polybench-symm": {
        "common": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch",
            "peel-loops"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch",
            "peel-loops",
            "tree-dominator-opts",
            "tree-vrp",
            "schedule-insns2",
            "gcse",
            "guess-branch-probability",
            "inline-functions-called-once",
            "inline-functions",
            "inline-small-functions",
            "expensive-optimizations",
            "vect-cost-model",
            "tree-fre",
            "ipa-cp",
            "ssa-phiopt",
            "tree-copy-prop"
        ]
    },
    "cbench-automotive-susan-e": {
        "common": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "cprop-registers",
            "tree-vrp",
            "tree-ch",
            "schedule-insns2",
            "gcse",
            "tree-ter",
            "code-hoisting",
            "math-errno",
            "tree-pre",
            "expensive-optimizations",
            "move-loop-invariants",
            "tree-dominator-opts",
            "caller-saves",
            "unswitch-loops",
            "dse"
        ]
    },
    "cbench-telecom-adpcm-d": {
        "common": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions",
            "if-conversion",
            "ssa-phiopt",
            "guess-branch-probability",
            "dce",
            "schedule-insns2",
            "tree-switch-conversion",
            "tree-builtin-call-dce",
            "tree-dominator-opts",
            "peel-loops",
            "predictive-commoning",
            "vect-cost-model",
            "tree-loop-vectorize",
            "shrink-wrap",
            "code-hoisting",
            "math-errno",
            "ipa-reference"
        ]
    },
    "polybench-ludcmp": {
        "common": [
            "align-jumps",
            "tree-ccp",
            "tree-loop-vectorize",
            "inline-functions-called-once",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "tree-ch",
            "peel-loops"
        ],
        "top": [
            "align-jumps",
            "tree-ccp",
            "tree-loop-vectorize",
            "inline-functions-called-once",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "tree-ch",
            "peel-loops",
            "tree-vrp",
            "schedule-insns2",
            "tree-dominator-opts",
            "inline-small-functions",
            "reorder-blocks-algorithm",
            "ipa-cp",
            "gcse",
            "reorder-blocks-and-partition",
            "tree-pre",
            "tree-dce"
        ]
    },
    "polybench-lu": {
        "common": [
            "align-jumps",
            "tree-loop-vectorize",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "peel-loops"
        ],
        "top": [
            "align-jumps",
            "tree-loop-vectorize",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "peel-loops",
            "tree-ch",
            "tree-vrp",
            "schedule-insns2",
            "gcse",
            "tree-fre",
            "inline-small-functions",
            "ipa-cp",
            "inline-functions-called-once",
            "tree-dominator-opts",
            "reorder-blocks-algorithm",
            "tree-pre",
            "reorder-blocks",
            "code-hoisting"
        ]
    },
    "cbench-consumer-mad": {
        "common": [
            "align-jumps",
            "align-labels",
            "tree-vrp",
            "align-loops",
            "tree-pre",
            "align-functions",
            "tree-pta",
            "vect-cost-model"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "tree-vrp",
            "align-loops",
            "tree-pre",
            "align-functions",
            "tree-pta",
            "vect-cost-model",
            "guess-branch-probability",
            "if-conversion",
            "optimize-sibling-calls",
            "tree-slsr",
            "shrink-wrap",
            "reorder-blocks-and-partition",
            "crossjumping",
            "version-loops-for-strides",
            "ipa-icf",
            "compare-elim",
            "lra-remat",
            "ipa-sra"
        ]
    },
    "cbench-automotive-qsort1": {
        "common": [
            "align-jumps",
            "tree-loop-vectorize",
            "inline-small-functions",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "tree-loop-vectorize",
            "inline-small-functions",
            "align-labels",
            "guess-branch-probability",
            "align-loops",
            "align-functions",
            "tree-ch",
            "tree-dominator-opts",
            "peel-loops",
            "schedule-insns2",
            "tree-vrp",
            "inline-functions",
            "partial-inlining",
            "gcse",
            "ssa-phiopt",
            "inline-functions-called-once",
            "vect-cost-model",
            "move-loop-invariants",
            "tree-tail-merge"
        ]
    },
    "polybench-bicg": {
        "common": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "vect-cost-model",
            "align-functions",
            "peel-loops"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "vect-cost-model",
            "align-functions",
            "peel-loops",
            "inline-small-functions",
            "ipa-cp",
            "guess-branch-probability",
            "tree-tail-merge",
            "optimize-strlen",
            "inline-functions-called-once",
            "tree-ch",
            "tree-vrp",
            "tree-coalesce-vars",
            "tree-loop-distribute-patterns",
            "optimize-sibling-calls",
            "forward-propagate",
            "omit-frame-pointer",
            "tree-ter"
        ]
    },
    "cbench-security-sha": {
        "common": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "tree-ch",
            "tree-loop-vectorize",
            "tree-vrp",
            "tree-dominator-opts",
            "schedule-insns2",
            "guess-branch-probability",
            "ipa-ra",
            "gcse",
            "ipa-sra",
            "tree-pre",
            "predictive-commoning",
            "expensive-optimizations",
            "tree-slp-vectorize",
            "reciprocal-math",
            "vect-cost-model",
            "inline-small-functions"
        ]
    },
    "cbench-consumer-jpeg-d": {
        "common": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions"
        ],
        "top": [
            "align-jumps",
            "align-loops",
            "align-labels",
            "align-functions",
            "math-errno",
            "inline-small-functions",
            "gcse-after-reload",
            "guess-branch-probability",
            "lra-remat",
            "tree-slsr",
            "thread-jumps",
            "tree-sra",
            "combine-stack-adjustments",
            "forward-propagate",
            "version-loops-for-strides",
            "cx-limited-range ",
            "merge-constants",
            "associative-math",
            "tree-loop-vectorize",
            "reorder-blocks"
        ]
    },
    "cbench-telecom-adpcm-c": {
        "common": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "vect-cost-model"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "vect-cost-model",
            "if-conversion2",
            "ssa-phiopt",
            "guess-branch-probability",
            "if-conversion",
            "move-loop-invariants",
            "inline-small-functions",
            "isolate-erroneous-paths-dereference",
            "defer-pop",
            "cprop-registers",
            "omit-frame-pointer",
            "ipa-cp",
            "dce",
            "signed-zeros",
            "ipa-sra",
            "tree-builtin-call-dce"
        ]
    },
    "cbench-telecom-gsm": {
        "common": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "peel-loops"
        ],
        "top": [
            "align-jumps",
            "align-labels",
            "align-loops",
            "align-functions",
            "peel-loops",
            "tree-loop-vectorize",
            "predictive-commoning",
            "tree-dominator-opts",
            "tree-ch",
            "tree-vrp",
            "tree-pre",
            "guess-branch-probability",
            "ssa-phiopt",
            "if-conversion",
            "math-errno",
            "optimize-strlen",
            "unswitch-loops",
            "inline-functions-called-once",
            "caller-saves",
            "merge-constants"
        ]
    }
}

================================================
FILE: demo/comparison/features_by_workload_gcc_extra.json
================================================
{
    "cbench-automotive-bitcount": {
        "common": [
            "align-labels",
            "tree-ter",
            "align-functions",
            "align-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "tree-ter",
            "align-functions",
            "align-loops",
            "align-jumps",
            "tree-ch",
            "optimize-sibling-calls",
            "guess-branch-probability",
            "peephole2",
            "reorder-blocks-algorithm",
            "reorder-blocks",
            "reorder-blocks-and-partition",
            "gcse",
            "tree-vrp",
            "expensive-optimizations",
            "tree-dce",
            "schedule-insns2",
            "tree-fre",
            "split-loops",
            "omit-frame-pointer"
        ]
    },
    "cbench-automotive-susan-c": {
        "common": [
            "align-labels",
            "guess-branch-probability",
            "align-functions",
            "align-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "guess-branch-probability",
            "align-functions",
            "align-loops",
            "align-jumps",
            "cprop-registers",
            "tree-vrp",
            "schedule-insns2",
            "tree-ch",
            "gcse",
            "tree-dominator-opts",
            "tree-pre",
            "expensive-optimizations",
            "reorder-blocks-algorithm",
            "tree-ter",
            "code-hoisting",
            "tree-fre",
            "predictive-commoning",
            "reorder-blocks-and-partition",
            "move-loop-invariants"
        ]
    },
    "cbench-consumer-tiff2dither": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps",
            "reorder-blocks-algorithm",
            "vect-cost-model",
            "inline-functions-called-once",
            "hoist-adjacent-loads",
            "guess-branch-probability",
            "inline-functions",
            "ipa-ra",
            "reciprocal-math",
            "tree-ccp",
            "ipa-sra",
            "optimize-strlen",
            "split-paths",
            "reorder-functions",
            "caller-saves",
            "tree-builtin-call-dce",
            "tree-vrp"
        ]
    },
    "cbench-office-stringsearch2": {
        "common": [
            "align-labels",
            "guess-branch-probability",
            "inline-functions",
            "align-functions",
            "align-loops",
            "align-jumps",
            "inline-small-functions"
        ],
        "top": [
            "align-labels",
            "guess-branch-probability",
            "inline-functions",
            "align-functions",
            "align-loops",
            "align-jumps",
            "inline-small-functions",
            "ipa-pure-const",
            "tree-dominator-opts",
            "tree-pre",
            "schedule-insns2",
            "tree-vrp",
            "gcse",
            "tree-ch",
            "partial-inlining",
            "expensive-optimizations",
            "tree-ccp",
            "tree-fre",
            "dse",
            "reorder-blocks-algorithm"
        ]
    },
    "polybench-2mm": {
        "common": [
            "align-labels",
            "align-loops",
            "peel-loops",
            "ipa-cp",
            "align-jumps",
            "tree-ch"
        ],
        "top": [
            "align-labels",
            "align-loops",
            "peel-loops",
            "ipa-cp",
            "align-jumps",
            "tree-ch",
            "tree-vrp",
            "schedule-insns2",
            "align-functions",
            "tree-dominator-opts",
            "predictive-commoning",
            "inline-functions-called-once",
            "gcse",
            "tree-pre",
            "guess-branch-probability",
            "inline-small-functions",
            "tree-fre",
            "tree-partial-pre",
            "tree-ccp",
            "tree-loop-vectorize"
        ]
    },
    "polybench-3mm": {
        "common": [
            "align-labels",
            "tree-dominator-opts",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-vrp",
            "align-jumps",
            "tree-ch"
        ],
        "top": [
            "align-labels",
            "tree-dominator-opts",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-vrp",
            "align-jumps",
            "tree-ch",
            "ipa-cp",
            "schedule-insns2",
            "predictive-commoning",
            "inline-functions-called-once",
            "tree-pre",
            "inline-small-functions",
            "gcse",
            "guess-branch-probability",
            "tree-fre",
            "tree-partial-pre",
            "tree-ccp",
            "tree-dce"
        ]
    },
    "polybench-adi": {
        "common": [
            "align-labels",
            "guess-branch-probability",
            "tree-dominator-opts",
            "inline-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "ipa-cp",
            "align-jumps",
            "tree-ch",
            "inline-small-functions"
        ],
        "top": [
            "align-labels",
            "guess-branch-probability",
            "tree-dominator-opts",
            "inline-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "ipa-cp",
            "align-jumps",
            "tree-ch",
            "inline-small-functions",
            "ipa-cp-clone",
            "tree-vrp",
            "tree-fre",
            "align-functions",
            "tree-ccp",
            "code-hoisting",
            "schedule-insns2",
            "tree-pre",
            "gcse"
        ]
    },
    "polybench-atax": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps",
            "tree-ch",
            "inline-small-functions"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps",
            "tree-ch",
            "inline-small-functions",
            "tree-loop-vectorize",
            "tree-pre",
            "schedule-insns2",
            "tree-vrp",
            "tree-dominator-opts",
            "ipa-cp",
            "gcse",
            "predictive-commoning",
            "inline-functions-called-once",
            "guess-branch-probability",
            "tree-partial-pre",
            "optimize-strlen",
            "tree-fre"
        ]
    },
    "polybench-doitgen": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps",
            "peel-loops",
            "tree-pre",
            "tree-dominator-opts",
            "predictive-commoning",
            "gcse",
            "guess-branch-probability",
            "tree-vrp",
            "tree-fre",
            "tree-ch",
            "tree-partial-pre",
            "vect-cost-model",
            "code-hoisting",
            "inline-functions-called-once",
            "inline-small-functions",
            "tree-builtin-call-dce",
            "tree-dce"
        ]
    },
    "polybench-durbin": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps",
            "predictive-commoning",
            "tree-vrp",
            "tree-ch",
            "tree-loop-vectorize",
            "tree-pre",
            "guess-branch-probability",
            "ipa-cp",
            "inline-small-functions",
            "gcse",
            "schedule-insns2",
            "inline-functions-called-once",
            "reciprocal-math",
            "indirect-inlining",
            "devirtualize",
            "auto-inc-dec"
        ]
    },
    "polybench-fdtd-2d": {
        "common": [
            "align-labels",
            "guess-branch-probability",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "guess-branch-probability",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "align-jumps",
            "ipa-cp",
            "tree-ch",
            "tree-vrp",
            "inline-functions-called-once",
            "tree-dominator-opts",
            "inline-small-functions",
            "schedule-insns2",
            "tree-fre",
            "gcse",
            "code-hoisting",
            "tree-pre",
            "tree-ccp",
            "ipa-icf-variables"
        ]
    },
    "polybench-fdtd-apml": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps",
            "tree-ch"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "align-jumps",
            "tree-ch",
            "unsafe-math-optimizations",
            "tree-pre",
            "tree-fre",
            "gcse",
            "guess-branch-probability",
            "inline-functions-called-once",
            "code-hoisting",
            "tree-dominator-opts",
            "omit-frame-pointer",
            "tree-loop-vectorize",
            "move-loop-invariants",
            "peephole2",
            "inline-small-functions",
            "ipa-cp",
            "store-merging"
        ]
    },
    "polybench-gemver": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "tree-loop-vectorize",
            "align-jumps",
            "tree-pre",
            "inline-small-functions",
            "tree-vrp",
            "tree-dominator-opts",
            "ipa-cp",
            "guess-branch-probability",
            "tree-ch",
            "predictive-commoning",
            "inline-functions-called-once",
            "gcse",
            "tree-fre",
            "dse",
            "partial-inlining",
            "combine-stack-adjustments"
        ]
    },
    "polybench-gesummv": {
        "common": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps"
        ],
        "top": [
            "align-labels",
            "align-functions",
            "align-loops",
            "peel-loops",
            "align-jumps",
            "unsafe-math-optimizations",
            "guess-branch-probability",
            "inline-small-functions",
            "schedule-insns2",
            "tree-dominator-opts",
            "tree-vrp",
            "gcse",
            "inline-functions-called-once",
            "tree-ch",
            "ipa-cp",
            "vect-cost-model",
            "dce",
            "ipa-icf",
            "gcse-after-reload",
            "tree-ter"
        ]
    }
}

================================================
FILE: demo/comparison/features_by_workload_llvm.json
================================================
{
    "cbench-telecom-gsm": {
        "common": [
            "early-cse",
            "gvn",
            "instcombine",
            "jump-threading"
        ],
        "top": [
            "early-cse",
            "gvn",
            "instcombine",
            "jump-threading",
            "sroa",
            "mem2reg",
            "licm",
            "inject-tli-mappings",
            "early-cse-memssa",
            "loop-unroll",
            "loop-vectorize",
            "transform-warning",
            "libcalls-shrinkwrap",
            "adce",
            "indvars",
            "loop-sink",
            "callsite-splitting",
            "globalopt",
            "loop-rotate",
            "speculative-execution"
        ]
    },
    "cbench-automotive-qsort1": {
        "common": [
            "instcombine",
            "block-freq"
        ],
        "top": [
            "instcombine",
            "block-freq",
            "globalopt",
            "ipsccp",
            "gvn",
            "licm",
            "sroa",
            "loop-rotate",
            "mem2reg",
            "indvars",
            "loop-vectorize",
            "function-attrs",
            "loop-unroll",
            "early-cse-memssa",
            "sccp",
            "lazy-block-freq",
            "always-inline",
            "strip-dead-prototypes",
            "bdce",
            "domtree"
        ]
    },
    "cbench-automotive-susan-e": {
        "common": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "loop-unroll",
            "early-cse",
            "sroa",
            "licm",
            "mem2reg"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "loop-unroll",
            "early-cse",
            "sroa",
            "licm",
            "mem2reg",
            "slp-vectorizer",
            "simplifycfg",
            "loop-vectorize",
            "tbaa",
            "tailcallelim",
            "function-attrs",
            "instsimplify",
            "reassociate",
            "always-inline",
            "float2int",
            "dse"
        ]
    },
    "cbench-consumer-tiff2rgba": {
        "common": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "sroa",
            "licm",
            "mem2reg"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "sroa",
            "licm",
            "mem2reg",
            "slp-vectorizer",
            "loop-vectorize",
            "loop-unroll",
            "early-cse",
            "indvars",
            "dse",
            "globalopt",
            "jump-threading",
            "loop-distribute",
            "memoryssa",
            "loop-accesses",
            "prune-eh",
            "aggressive-instcombine"
        ]
    },
    "cbench-network-patricia": {
        "common": [
            "instcombine"
        ],
        "top": [
            "instcombine",
            "ipsccp",
            "aggressive-instcombine",
            "gvn",
            "globalopt",
            "loop-vectorize",
            "licm",
            "sroa",
            "mem2reg",
            "simplifycfg",
            "loop-rotate",
            "function-attrs",
            "jump-threading",
            "called-value-propagation",
            "early-cse-memssa",
            "dse",
            "indvars",
            "postdomtree",
            "inject-tli-mappings",
            "adce"
        ]
    },
    "cbench-automotive-bitcount": {
        "common": [
            "loop-rotate",
            "gvn",
            "licm"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "licm",
            "globalopt",
            "mem2reg",
            "jump-threading",
            "sroa",
            "instcombine",
            "simplifycfg",
            "speculative-execution",
            "indvars",
            "loop-unroll",
            "scoped-noalias-aa",
            "early-cse-memssa",
            "adce",
            "ipsccp",
            "lazy-branch-prob",
            "slp-vectorizer",
            "postdomtree",
            "dse"
        ]
    },
    "cbench-bzip2": {
        "common": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "loop-unroll",
            "sroa",
            "licm",
            "mem2reg"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "loop-unroll",
            "sroa",
            "licm",
            "mem2reg",
            "slp-vectorizer",
            "loop-vectorize",
            "early-cse",
            "indvars",
            "jump-threading",
            "dse",
            "loop-accesses",
            "loop-instsimplify",
            "scoped-noalias-aa",
            "lazy-block-freq",
            "memcpyopt",
            "always-inline"
        ]
    },
    "cbench-consumer-tiff2bw": {
        "common": [
            "loop-rotate",
            "gvn",
            "instcombine",
            "sroa",
            "licm",
            "jump-threading",
            "mem2reg"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "instcombine",
            "sroa",
            "licm",
            "jump-threading",
            "mem2reg",
            "slp-vectorizer",
            "loop-vectorize",
            "early-cse-memssa",
            "loop-unroll",
            "alignment-from-assumptions",
            "function-attrs",
            "correlated-propagation",
            "scoped-noalias-aa",
            "openmp-opt-cgscc",
            "postdomtree",
            "prune-eh",
            "lcssa",
            "lazy-block-freq"
        ]
    },
    "cbench-consumer-jpeg-d": {
        "common": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "sroa",
            "licm",
            "mem2reg"
        ],
        "top": [
            "loop-rotate",
            "gvn",
            "early-cse-memssa",
            "instcombine",
            "sroa",
            "licm",
            "mem2reg",
            "loop-vectorize",
            "loop-unroll",
            "indvars",
            "dse",
            "function-attrs",
            "transform-warning",
            "slp-vectorizer",
            "alignment-from-assumptions",
            "called-value-propagation",
            "callsite-splitting",
            "loops",
            "float2int",
            "elim-avail-extern"
        ]
    },
    "cbench-telecom-adpcm-c": {
        "common": [],
        "top": [
            "globalopt",
            "gvn",
            "memcpyopt",
            "mem2reg",
            "strip-dead-prototypes",
            "simplifycfg",
            "licm",
            "lazy-block-freq",
            "loop-instsimplify",
            "sroa",
            "elim-avail-extern",
            "instcombine",
            "libcalls-shrinkwrap",
            "reassociate",
            "globaldce",
            "loop-rotate",
            "loop-vectorize",
            "ipsccp",
            "globals-aa",
            "function-attrs"
        ]
    },
    "cbench-telecom-adpcm-d": {
        "common": [
            "instcombine",
            "callsite-splitting"
        ],
        "top": [
            "instcombine",
            "callsite-splitting",
            "globalopt",
            "mem2reg",
            "gvn",
            "simplifycfg",
            "licm",
            "sroa",
            "loop-unroll",
            "loop-rotate",
            "loop-distribute",
            "indvars",
            "early-cse-memssa",
            "ipsccp",
            "phi-values",
            "scoped-noalias-aa",
            "alignment-from-assumptions",
            "jump-threading",
            "rpo-function-attrs",
            "loop-simplifycfg"
        ]
    },
    "cbench-office-stringsearch2": {
        "common": [
            "instcombine",
            "libcalls-shrinkwrap"
        ],
        "top": [
            "instcombine",
            "libcalls-shrinkwrap",
            "reassociate",
            "licm",
            "globalopt",
            "ipsccp",
            "function-attrs",
            "inferattrs",
            "early-cse",
            "gvn",
            "phi-values",
            "simplifycfg",
            "early-cse-memssa",
            "loop-rotate",
            "mem2reg",
            "sroa",
            "callsite-splitting",
            "rpo-function-attrs",
            "inject-tli-mappings",
            "loop-load-elim"
        ]
    },
    "cbench-security-rijndael": {
        "common": [
            "loop-rotate",
            "globalopt",
            "gvn",
            "instcombine",
            "branch-prob",
            "slp-vectorizer",
            "globaldce",
            "aggressive-instcombine",
            "simplifycfg",
            "loop-unroll",
            "called-value-propagation",
            "deadargelim",
            "sroa",
            "vector-combine",
            "memoryssa",
            "loop-vectorize"
        ],
        "top": [
            "loop-rotate",
            "globalopt",
            "gvn",
            "instcombine",
            "branch-prob",
            "slp-vectorizer",
            "globaldce",
            "aggressive-instcombine",
            "simplifycfg",
            "loop-unroll",
            "called-value-propagation",
            "deadargelim",
            "sroa",
            "vector-combine",
            "memoryssa",
            "loop-vectorize",
            "loop-simplifycfg",
            "function-attrs",
            "loop-distribute",
            "licm"
        ]
    },
    "cbench-security-sha": {
        "common": [
            "div-rem-pairs",
            "correlated-propagation"
        ],
        "top": [
            "div-rem-pairs",
            "correlated-propagation",
            "instcombine",
            "globalopt",
            "gvn",
            "ipsccp",
            "sroa",
            "licm",
            "mem2reg",
            "loop-rotate",
            "early-cse-memssa",
            "function-attrs",
            "strip-dead-prototypes",
            "block-freq",
            "indvars",
            "loop-unroll",
            "lcssa",
            "loop-simplifycfg",
            "loop-vectorize",
            "branch-prob"
        ]
    }
}

================================================
FILE: demo/comparison/plot.py
================================================
import json
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.ticker import MultipleLocator

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

pngs_path = package_path / "demo/comparison/pngs"

def create_plots(data, file_name, format="pdf"):
    mpl.rcParams["font.family"] = ["serif"]
    mpl.rcParams["font.serif"] = ["Times New Roman"]

    # Plot settings
    fig = plt.figure(figsize=(20, 8))

    # Titles for subplots
    titles = ["ParEGO", "SMS-EGO", "MOEA/D-EGO", "Ours"]

    data[0], data[2] = data[2], data[0]
    
    global_min = np.min([np.min(d, axis=0) for d in data], axis=0)
    global_max = np.max([np.max(d, axis=0) for d in data], axis=0)
    
    for i, d in enumerate(data):
        ax = fig.add_subplot(1, 4, i + 1, projection='3d', proj_type='ortho')
        ax.scatter(d[:, 0], d[:, 1], d[:, 2], facecolors='none', edgecolors='#304F9E', s=50, linewidths=1)

        ax.text2D(0.85, 0.85, titles[i], transform=ax.transAxes, fontsize=14,
            verticalalignment='center', horizontalalignment='center', 
            bbox=dict(facecolor='white', alpha=0.5, boxstyle="round,pad=0.3"))
            
        ax.view_init(elev=20, azim=-45)
        # Set the background of each axis to be transparent
        ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        ax.yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        ax.zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
        
        ax.set_xlim(global_min[0], global_max[0])
        ax.set_ylim(global_min[1], global_max[1])
        ax.set_zlim(global_min[2], global_max[2])
        
        ax.tick_params(labelsize=14)
    
    # Save the plot as a file
    
    # plt.savefig(Path(pngs_path) / f"{file_name}.png", format="png", bbox_inches="tight")
    plt.savefig(Path(pngs_path) / f"{file_name}.{format}", format=format, bbox_inches="tight")
    plt.close(fig)
    

def load_data(workload, algorithm, seed):
    if target == "llvm":
        result_file = llvm_results / f"llvm_{workload}" / algorithm / f"{seed}_KB.json"
    else:
        result_file = gcc_results / f"gcc_{workload}" / algorithm / f"{seed}_KB.json"
    df = load_and_prepare_data(result_file)
    return df

def load_and_prepare_data(file_path):
    """
    Loads JSON data and prepares a DataFrame.
    """
    with open(file_path, "r") as f:
        data = json.load(f)
        if "1" in data:
            data = data["1"]

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())

    for obj in objectives:
        df_combined = df_combined[df_combined[obj] != 1e10]

    return df_combined

def get_data_ranges(data):
    return {
        'min': np.min([np.min(d, axis=0) for d in data], axis=0),
        'max': np.max([np.max(d, axis=0) for d in data], axis=0)
    }
    
def rescale_data(data, original_range, target_range):
    # 归一化到0-1
    data_normalized = (data - original_range[0]) / (original_range[1] - original_range[0])
    # 缩放到新范围
    data_rescaled = data_normalized * (target_range[1] - target_range[0]) + target_range[0]
    return data_rescaled

def map_data_to_mysql_ranges(data, gcc_llvm_range, mysql_range):
    # 假设data是一个n*3的数组，每列分别是吞吐量、延迟、CPU使用率
    data_mapped = np.copy(data)
    for i, key in enumerate(['throughput', 'latency', 'cpu_usage']):
        original_range = (np.min(gcc_llvm_range[key]), np.max(gcc_llvm_range[key]))
        target_range = mysql_range[key]
        data_mapped[:, i] = rescale_data(data[:, i], original_range, target_range)
    return data_mapped

def invert_mapping(value, min_val, max_val):
    # 这将反转映射，所以低值变高，高值变低
    return max_val - (value - min_val)


workloads_improved = [
    "cbench-telecom-gsm",
    "cbench-automotive-qsort1",
    "cbench-automotive-susan-e",
    "cbench-consumer-tiff2rgba",
    "cbench-network-patricia",
    "cbench-consumer-tiff2bw",
    "cbench-consumer-jpeg-d",
    "cbench-telecom-adpcm-c",
    "cbench-security-rijndael",
    "cbench-security-sha",
]
              
results_path = package_path / "experiment_results"
gcc_results = results_path / "gcc_comparsion"
llvm_results = results_path / "llvm_comparsion"


algorithm_list = ["ParEGO", "SMSEGO", "MoeadEGO", "CauMO"]
objectives = ["execution_time", "file_size", "compilation_time"]
mysql_objs = ["throughput", "latency", "cpu_usage"]
seed_list = [65535, 65536, 65537, 65538, 65539]

mysql_ranges = {
    'voter': {'throughput_range': (0, 8000), 'latency_range': (0, 130000), 'cpu_usage_range': (0, 0.2)},
    'sibench': {'throughput_range': (0, 17500), 'latency_range': (0, 300000), 'cpu_usage_range': (0, 0.4)},
    'smallbank': {'throughput_range': (0, 10000), 'latency_range': (0, 500000), 'cpu_usage_range': (0, 0.6)},
    'tatp': {'throughput_range': (0, 21000), 'latency_range': (0, 50000), 'cpu_usage_range': (0, 1.0)},
    'twitter': {'throughput_range': (0, 13000), 'latency_range': (0, 60000), 'cpu_usage_range': (0, 1.2)},
    'tpcc': {'throughput_range': (0, 1450), 'latency_range': (0, 500000), 'cpu_usage_range': (0, 2.0)}
}

out_format = "pdf"
target = "llvm"
workloads_improved = ["cbench-consumer-tiff2bw"] 
seed_list = [65539]
# out_format = "png"

for seed in seed_list:
    try:
        for workload in workloads_improved:
            data_for_plotting = []
            for algorithm in algorithm_list:
                df = load_data(workload, algorithm, seed)
                df_normalized = (df - df.min()) / (df.max() - df.min())
                df_normalized = df
                data_for_plotting.append(df[objectives].to_numpy())
            
            #get short_name of workload
            workload = workload[7:]
            gcc_llvm_ranges = get_data_ranges(data_for_plotting)
            gcc_llvm_min, gcc_llvm_max = gcc_llvm_ranges['min'], gcc_llvm_ranges['max']
            
            for i in range(len(data_for_plotting)):
                # 现在假设索引0是代表吞吐量的，我们需要反转它的映射
                # 因为我们假定较低的GCC/LLVM值表示较好的性能，但对于MySQL，吞吐量需要较高的值表示较好的性能
                data_for_plotting[i][:, 0] = np.array([
                    invert_mapping(x, gcc_llvm_ranges['min'][0], gcc_llvm_ranges['max'][0])
                    for x in data_for_plotting[i][:, 0]
                ])
            
            for i in range(len(data_for_plotting)):
                for j, obj in enumerate(mysql_objs):
                    original_min = gcc_llvm_min[j]
                    original_max = gcc_llvm_max[j]
                    target_min = mysql_ranges['tatp'][f'{obj}_range'][0]
                    target_max = mysql_ranges['tatp'][f'{obj}_range'][1]

                    data_for_plotting[i][:, j] = rescale_data(
                        data_for_plotting[i][:, j],
                        (original_min, original_max),
                        (target_min, target_max)
                    )
                    
            create_plots(data_for_plotting, f"{target}_{workload}_{seed}", out_format)
    except Exception as e:
        print(f"Error: {e}")
        continue
    
# # Usage example
# np.random.seed(0)  # For reproducibility
# # data = [np.random.rand(500, 3) * 1000 for _ in range(4)]
# create_plots(df[objectives].to_numpy(), "optimization_evaluation")


# # Create synthetic data for different algorithms for each workload
# num_points = 500
# workloads = ["voter", "sibench", "smallbank", "tatp", "twitter", "tpcc"]

# def skewed_beta(a, b, min_value, max_value, n_points, skew_factor=5):
#     """
#     Generate beta distributed data points with a skew towards one of the extremes.
#     skew_factor > 1 will skew towards the max_value, otherwise towards min_value.
#     """
#     data = np.random.beta(a, b, n_points)
#     if skew_factor > 1:
#         return data**skew_factor * (max_value - min_value) + min_value
#     else:
#         return (1 - data**skew_factor) * (max_value - min_value) + min_value

# def generate_data_points(n_points, workload_ranges):
#     """
#     Generate synthetic data for different algorithms for each workload with a tendency to cluster around (0,0,x)
#     For 'our' method, the distribution is more varied to cover more PF.
#     """
#     all_data = []
#     for name, ranges in workload_ranges.items():
#         data_for_workloads = []
#         for i in range(4):  # Four algorithms including 'our' method
#             # Heavily skew throughput and latency towards lower values
#             throughput_data = skewed_beta(2, 2, ranges['throughput_range'][0], ranges['throughput_range'][1], n_points, skew_factor=0.3)
#             latency_data = skewed_beta(2, 2, ranges['latency_range'][0], ranges['latency_range'][1], n_points, skew_factor=0.3)
#             # Use a normal distribution for cpu usage but clip to range
#             cpu_usage_data = np.random.normal(loc=ranges['cpu_usage_range'][1]/2, scale=ranges['cpu_usage_range'][1]/6, size=n_points)
#             cpu_usage_data = np.clip(cpu_usage_data, ranges['cpu_usage_range'][0], ranges['cpu_usage_range'][1])

#             if i == 3:  # 'our' method should cover more PF
#                 # Add more variability to 'our' method
#                 throughput_data = np.random.uniform(ranges['throughput_range'][0], ranges['throughput_range'][1], n_points)
#                 latency_data = np.random.uniform(ranges['latency_range'][0], ranges['latency_range'][1], n_points)

#             data_for_workloads.append(np.column_stack((throughput_data, latency_data, cpu_usage_data)))
#         all_data.append(data_for_workloads)
#     return all_data


# n_points = 500
# # workloads_data = {
# #     'voter': generate_data_points(n_points, 0, 8000, 0, 130000, 0, 0.2),
# #     'sibench': generate_data_points(n_points, 0, 17500, 0, 300000, 0, 0.4),
# #     'smallbank': generate_data_points(n_points, 0, 10000, 0, 500000, 0, 0.6),
# #     'tatp': generate_data_points(n_points, 0, 21000, 0, 50000, 0, 1.0),
# #     'twitter': generate_data_points(n_points, 0, 13000, 0, 60000, 0, 1.2),
# #     'tpcc': generate_data_points(n_points, 0, 1450, 0, 500000, 0, 2.0)
# # }

workload_ranges = {
    'voter': {'throughput_range': (0, 8000), 'latency_range': (0, 130000), 'cpu_usage_range': (0, 0.2)},
    'sibench': {'throughput_range': (0, 17500), 'latency_range': (0, 300000), 'cpu_usage_range': (0, 0.4)},
    'smallbank': {'throughput_range': (0, 10000), 'latency_range': (0, 500000), 'cpu_usage_range': (0, 0.6)},
    'tatp': {'throughput_range': (0, 21000), 'latency_range': (0, 50000), 'cpu_usage_range': (0, 1.0)},
    'twitter': {'throughput_range': (0, 13000), 'latency_range': (0, 60000), 'cpu_usage_range': (0, 1.2)},
    'tpcc': {'throughput_range': (0, 1450), 'latency_range': (0, 500000), 'cpu_usage_range': (0, 2.0)}
}

# all_data = generate_data_points(500, workload_ranges)

# # all_data = []
# # for _ in range(4):
# #     all_data.append(generate_data_points(n_points, 0, 8000, 0, 130000, 0, 0.2))

# for i, workload in enumerate(workloads):
#     create_plots(all_data[i], f"mysql_{workload}")

================================================
FILE: demo/comparison/plot_samples_dbms.py
================================================
import sys
from pathlib import Path

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

import json
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from matplotlib.animation import FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

from transopt.utils.pareto import calc_hypervolume, find_pareto_front
from transopt.utils.plot import plot3D

results_path = package_path / "experiment_results"
dbms_samples_path = results_path / "dbms_samples"

objectives = ["throughput", "latency"]


def load_and_prepare_data(file_path):
    """
    Loads JSON data and prepares a DataFrame.
    """
    # print(f"Loading data from {file_path}")
    with open(file_path, "r") as f:
        data = json.load(f)
        if "1" in data:
            data = data["1"]

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)
    # print(f"Loaded {len(df_combined)} data points")

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())

    for obj in objectives:
        if obj == "latency":
            df_combined = df_combined[df_combined[obj] > 0]  # Discard latency less than 0
        else:
            df_combined = df_combined[df_combined[obj] != 1e10]  # Original condition

    # print(f"Loaded {len(df_combined)} data points, removed {len(df_input) - len(df_combined)} duplicates")
    # print()
    return df_combined

def load_data(workload):
    result_file = dbms_samples_path / f"DBMS_{workload}.json"
    df = load_and_prepare_data(result_file)
    return df


def plot_pareto_front(workload):
    df = load_data(workload)
    df_normalized = (df - df.min()) / (df.max() - df.min())
    _, pareto_indices = find_pareto_front(df_normalized[objectives].values, return_index=True, obj_type=['max', 'min'])
    
    # Retrieve Pareto points
    points = df_normalized.iloc[pareto_indices][objectives]
    
    plt.figure()
    plt.title(f"Pareto Front for {workload}")
    plt.xlabel(objectives[0])
    plt.ylabel(objectives[1])
    plt.scatter(points[objectives[0]], points[objectives[1]], c='b', marker='o')
     
    # Save the plot as a file
    file_path = package_path / "demo" / "comparison" / "pngs" / f"dbms_pf_{workload}.png"
    plt.savefig(file_path)
    plt.close()  # Close the plot to free memory
    
    
def plot_all(workload):
    df = load_data(workload)
    df_normalized = (df - df.min()) / (df.max() - df.min())
    
    plt.figure()
    plt.title(f"All samples for {workload}")
    plt.xlabel(objectives[0])
    plt.ylabel(objectives[1])
    plt.scatter(df_normalized[objectives[0]], df_normalized[objectives[1]], c='b', marker='o')
    
    # Save the plot as a file
    file_path = package_path / "demo" / "comparison" / "pngs" / f"dbms_all_{workload}.png"
    plt.savefig(file_path)
    plt.close()  # Close the plot to free memory
    
if __name__ == "__main__":
    workloads_dbms = [
        "sibench",
        "smallbank",
        "tatp",
        "tpcc",
        "twitter",
        "voter"
    ] 
        
    for workload in workloads_dbms:
        plot_pareto_front(workload)
        plot_all(workload)

================================================
FILE: demo/comparison/start_server.py
================================================
import os
import sys
from pathlib import Path

# Define the current and package paths
current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

# Define the HTML directory
html_dir = package_path / "demo" / "comparison" / "htmls"

# Function to generate index.html
def generate_index_html():
    with open(html_dir / 'index.html', 'w') as index_file:
        index_file.write('<html><body>\n')
        index_file.write('<h1>List of HTML files</h1>\n')
        index_file.write('<ul>\n')

        # Loop through each html file in the directory
        for html_file in html_dir.glob('*.html'):
            link = html_file.name
            # Exclude index.html from the list
            if link != 'index.html':
                index_file.write(f'<li><a href="{link}">{link}</a></li>\n')

        index_file.write('</ul>\n')
        index_file.write('</body></html>')

# Function to start a simple HTTP server
def start_http_server():
    os.chdir(html_dir)  # Change working directory to html directory
    os.system("python -m http.server")  # Start the server

if __name__ == "__main__":
    generate_index_html()  # Generate the index.html file
    start_http_server()  # Start the server

================================================
FILE: demo/correlation_analysis.py
================================================
import logging
import os
import argparse

from pathlib import Path
from csstuning.compiler.compiler_benchmark import CompilerBenchmarkBase
from transopt.ResultAnalysis.CorrelationAnalysis import MutualInformation
from transopt.ResultAnalysis.CorrelationAnalysis import correlation_analysis
def run_analysis(Exper_folder:Path, tasks, methods, seeds, args):
    logger = logging.getLogger(__name__)
    correlation_analysis(Exper_folder, tasks=tasks, methods=methods, seeds=seeds, args=args)


if __name__ == '__main__':
    samples_num = 5000
    tasks = {
        "GCC": {"budget": samples_num, "workloads": None},
        "LLVM": {"budget": samples_num, "workloads": None},
    }
    Methods_list = {'ParEGO'}
    Seeds = [0]

    parser = argparse.ArgumentParser(description='Process some integers.')
    parser.add_argument("-in", "--init_number", type=int, default=0)
    parser.add_argument("-p", "--exp_path", type=str, default='../LFL_experiments')
    parser.add_argument("-n", "--exp_name", type=str, default='test')  # 实验名称，保存在experiments中
    parser.add_argument("-c", "--comparision", type=bool, default=True)
    parser.add_argument("-a", "--track", type=bool, default=True)
    parser.add_argument("-r", "--report", type=bool, default=False)
    parser.add_argument("-lm", "--load_mode", type=bool, default=True)  # 控制是否从头开始

    args = parser.parse_args()
    Exp_name = args.exp_name
    Exp_folder = args.exp_path
    Exper_folder = '{}/{}'.format(Exp_folder, Exp_name)
    Exper_folder = Path(Exper_folder)
    run_analysis(Exper_folder, tasks=tasks, methods=Methods_list, seeds = Seeds, args=args)


================================================
FILE: demo/experiment_lsh_validity.py
================================================
import random
import string
import time
import uuid
import pandas as pd

from transopt.datamanager.manager import DataManager
from transopt.datamanager.database import Database
from transopt.utils.path import get_library_path

base_strings = {
    "finance": [
        "interest_rate",
        "loan_amount",
        "credit_score",
        "investment_return",
        "market_risk",
    ],
    "health": [
        "blood_pressure",
        "heart_rate",
        "cholesterol_level",
        "blood_sugar",
        "body_mass_index",
    ],
    "transportation": [
        "traffic_flow",
        "fuel_usage",
        "travel_time",
        "vehicle_capacity",
        "route_efficiency",
    ],
    "energy": [
        "power_consumption",
        "emission_level",
        "renewable_source",
        "energy_cost",
        "grid_stability",
    ],
    "education": [
        "student_performance",
        "teacher_ratio",
        "course_availability",
        "graduation_rate",
        "facility_utilization",
    ],
}


def generate_random_string(length):
    letters = string.ascii_lowercase
    return "".join(random.choice(letters) for i in range(length))


def generate_dataset_config():
    domain = random.choice(list(base_strings.keys()))
    num_variables = random.randint(3, 5)
    num_objectives = random.randint(1, 2)

    workload = random.randint(1, 5)
    problem_name = f"{domain}{generate_random_string(3)}"
    dataset_name = f"{problem_name}_{workload}_{uuid.uuid4().hex[:8]}"

    variables = []
    selected_base_strings = random.sample(base_strings[domain], k=num_variables)
    for base in selected_base_strings:
        random_suffix = generate_random_string(random.randint(1, 3))
        variable_name = f"{base}{random_suffix}"
        variables.append(
            {"name": variable_name, "type": "continuous"}
        )  # Assume all variables are float for simplicity

    objectives = [
        {"name": f"obj_{i}_{generate_random_string(3)}", "type": "minimize"}
        for i in range(num_objectives)
    ]
    fidelities = []  # No fidelities defined in your setup, can be adjusted if needed

    # Additional fields
    additional_config = {
        "problem_name": problem_name,
        "dim": num_variables,
        "obj": num_objectives,
        "fidelity": generate_random_string(random.randint(3, 6)),
        "workloads": workload,
        "budget_type": random.choice(["Num_FEs", "Hours", "Minutes", "Seconds"]),
        "budget": random.randint(1, 100),
    }

    return dataset_name, {
        "variables": variables,
        "objectives": objectives,
        "fidelities": fidelities,
        "additional_config": additional_config,
    }


def create_experiment_datasets(dm, num_datasets):
    for _ in range(num_datasets):
        dataset_name, dataset_cfg = generate_dataset_config()
        dm.create_dataset(dataset_name, dataset_cfg)


def get_shingles(text, ngram=5):
    return set(text[i : i + ngram] for i in range(len(text) - ngram + 1))


def cal_jacard_similarity(cfg1, cfg2):
    task_name1, variable_names1 = cfg1
    task_name2, variable_names2 = cfg2

    shingles1 = get_shingles(task_name1).union(get_shingles(variable_names1))
    shingles2 = get_shingles(task_name2).union(get_shingles(variable_names2))

    return len(shingles1.intersection(shingles2)) / len(shingles1.union(shingles2))


def validity_experiment(n_tables, num_replicates=3, jacard_lower_bound = 0.35):
    # Clean up the database
    db_path = get_library_path() / "exp_database.db"
    if db_path.exists():
        db_path.unlink()

    db = Database(db_path)
    dm = DataManager(db, num_hashes=100, char_ngram=5, num_bands=50)
    setup_start = time.time()
    create_experiment_datasets(dm, n_tables)
    setup_end = time.time()
    print(f"Generated {n_tables} datasets in {setup_end - setup_start} seconds")

    exec_time_jacard = []
    exec_time_lsh = []
    for _ in range(num_replicates):
        target_dataset_name, target_dataset_cfg = generate_dataset_config()
        print(
            f"Searching for similar datasets to {target_dataset_name}"
        )
        print("=====================================")

        task_name, var_names, num_var, num_obj = dm._construct_vector(
            target_dataset_cfg
        )

        start_jacard = time.time()
        similar_datasets_by_jacard = set()
        all_datasets = dm.get_all_datasets()
        for dataset in all_datasets:
            dataset_info = dm.get_dataset_info(dataset)
            task_name_tmp, var_names_tmp, num_var_tmp, num_obj_tmp = (
                dm._construct_vector(dataset_info)
            )
            if num_var != num_var_tmp or num_obj != num_obj_tmp:
                continue

            similarity = cal_jacard_similarity(
                (task_name, var_names), (task_name_tmp, var_names_tmp)
            )

            if similarity >= jacard_lower_bound:
                similar_datasets_by_jacard.add(dataset)

        end_jacard = time.time()
        exec_time_jacard.append(end_jacard - start_jacard)
        print(
            f"Found {len(similar_datasets_by_jacard)} similar datasets by jacard in {end_jacard - start_jacard} seconds"
        )

        start_lsh = time.time()
        similar_datasets = dm.search_similar_datasets(target_dataset_cfg)
        similar_datasets_by_lsh = set()
        for dataset in similar_datasets:
            dataset_info = dm.get_dataset_info(dataset)
            task_name_tmp, var_names_tmp, num_var_tmp, num_obj_tmp = (
                dm._construct_vector(dataset_info)
            )
            similarity = cal_jacard_similarity(
                (task_name, var_names), (task_name_tmp, var_names_tmp)
            )

            if similarity >= jacard_lower_bound:
                similar_datasets_by_lsh.add(dataset)

        end_lsh = time.time()
        exec_time_lsh.append(end_lsh - start_lsh)
        print(
            f"Found {len(similar_datasets_by_lsh)} similar datasets by lsh in {end_lsh - start_lsh} seconds"
        )
        print()

    dm.teardown()
    return exec_time_jacard, exec_time_lsh


if __name__ == "__main__":
    num_replicates = 20
    n_tables_list = [1000,2000, 3000,4000,5000,6000,7000,8000,10000]
    results_jacard = {}
    results_lsh = {}
    # results = []
    for n_tables in n_tables_list:
        exec_time_jacard, exec_time_lsh = validity_experiment(n_tables, num_replicates)
        # results.append(
        #     {
        #         "n_tables": n_tables,
        #         "exec_time_jacard": exec_time_jacard,
        #         "exec_time_lsh": exec_time_lsh,
        #     }
        # )
        print(f"n_tables: {n_tables} exec_time_jacard: {exec_time_jacard} exec_time_lsh {exec_time_lsh}")

        results_jacard[n_tables] = exec_time_jacard
        results_lsh[n_tables] = exec_time_lsh
        
        jacard_df = pd.DataFrame(results_jacard)
        lsh_df = pd.DataFrame(results_lsh)
        # 保存为CSV文件
        jacard_df.to_csv('jacard_exec_times.csv', index=False)
        lsh_df.to_csv('lsh_exec_times.csv', index=False)
        

================================================
FILE: demo/experiments.py
================================================
import logging
import os
import argparse
import sys

current_dir = os.path.dirname(os.path.abspath(__file__))
package_dir = os.path.dirname(current_dir)
sys.path.insert(0, package_dir)

from transopt.Benchmark import construct_test_suits
from optimizer.construct_optimizer import get_optimizer
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TaskDataHandler import OptTaskDataHandler


os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def run_experiments(tasks, args):
    logger = logging.getLogger(__name__)
    kb = construct_knowledgebase(args)
    testsuits = construct_test_suits(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


if __name__ == "__main__":
    tasks = {
        # 'DBMS':{'budget': 11, 'time_stamp': 3},
        # 'GCC' : {'budget': 11, 'time_stamp': 3},
        # 'LLVM' : {'budget': 11, 'time_stamp': 3},
        'Ackley': {'budget': 11, 'workloads': [1,2,3], 'params':{'input_dim':1}},
        # 'MPB': {'budget': 110, 'time_stamp': 3},
        # 'Griewank': {'budget': 11, 'time_stamp': 3,  'params':{'input_dim':2}},
        # "AckleySphere": {"budget": 1000, "workloads":[1,2,3], "params": {"input_dim": 2}},
        # 'Lunar': {'budget': 110, 'time_stamp': 3},
        # 'XGB': {'budget': 110, 'time_stamp': 3},
    }

    parser = argparse.ArgumentParser(description="Process some integers.")
    parser.add_argument("-im", "--init_method", type=str, default="random")
    parser.add_argument("-in", "--init_number", type=int, default=7)
    parser.add_argument(
        "-p", "--exp_path", type=str, default=f"{package_dir}/../LFL_experiments"
    )
    parser.add_argument(
        "-n", "--exp_name", type=str, default="test"
    )  # 实验名称，保存在experiments中
    parser.add_argument("-s", "--seed", type=int, default=0)  # 设置随机种子，与迭代次数相关
    parser.add_argument(
        "-m", "--optimizer", type=str, default="MTBO"
    )  # 设置method:WS,MT,INC
    parser.add_argument("-v", "--verbose", type=bool, default=True)
    parser.add_argument("-norm", "--normalize", type=str, default="norm")
    parser.add_argument("-sm", "--save_mode", type=int, default=1)  # 控制是否保存模型
    parser.add_argument("-lm", "--load_mode", type=bool, default=False)  # 控制是否从头开始
    parser.add_argument(
        "-ac", "--acquisition_func", type=str, default="LCB"
    )  # 控制BO的acquisition function
    args = parser.parse_args()

    run_experiments(tasks, args)


================================================
FILE: demo/importances/cal_relationship.py
================================================
import sys
from pathlib import Path

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))

import json
from pathlib import Path

import cmasher as cmr
import dcor
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd

target = "gcc"
results_path = package_path / "experiment_results"
gcc_comparsion_path = results_path / "gcc_archive_new"
gcc_samples_path = results_path / "gcc_samples"
llvm_comparsion_path = results_path / "llvm_archive"
llvm_samples_path = results_path / "llvm_samples"

pngs_path = package_path / "demo/importances/pngs"

mpl.rcParams['font.family'] = ['serif']
mpl.rcParams['font.serif'] = ['Times New Roman']

def load_and_prepare_data(file_path, objectives):
    """
    Loads JSON data and prepares a DataFrame.
    """
    with open(file_path, "r") as f:
        data = json.load(f)

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)
    # print(f"Loaded {len(df_combined)} data points")

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())
    # print(f"Removed {len(df_combined) - len(df_input)} duplicates")

    for obj in objectives:
        df_combined = df_combined[df_combined[obj] != 1e10]
    print(f"Loaded {len(df_combined)} data points after removing extreme values")
    return df_combined


def cal_dcor(df, objectives):
    """
    Calculate the distance correlation for each pair of objectives using the dcor library.
    """
    dcor_results = {}
    for i in range(len(objectives)):
        for j in range(i + 1, len(objectives)):
            obj1, obj2 = objectives[i], objectives[j]
            dcor_value = dcor.distance_correlation(df[obj1], df[obj2])
            dcor_results[f"{obj1}-{obj2}"] = dcor_value
    return dcor_results


def cal_spearman_corr(df, objectives):
    """
    Calculate the Spearman correlation for each pair of objectives.
    """

    corr_matrix = df[objectives].corr(method="spearman")

    spearman_results = {}
    for i in range(len(objectives)):
        for j in range(i + 1, len(objectives)):
            obj1, obj2 = objectives[i], objectives[j]
            corr_value = corr_matrix.at[obj1, obj2]
            spearman_results[f"{obj1}-{obj2}"] = corr_value

    return spearman_results


def cal_pearson_corr(df, objectives):
    """
    Calculate the Pearson correlation matrix for the given objectives and extract
    pairwise correlations from it.
    """
    corr_matrix = df[objectives].corr(method="pearson")

    pearson_results = {}
    for i in range(len(objectives)):
        for j in range(i + 1, len(objectives)):
            obj1, obj2 = objectives[i], objectives[j]
            corr_value = corr_matrix.at[obj1, obj2]
            pearson_results[f"{obj1}-{obj2}"] = corr_value

    return pearson_results


def generate_grid_plot(dcor_values_dict):
    workloads = list(dcor_values_dict.keys())
    objective_pairs = list(dcor_values_dict[workloads[0]].keys())

    dcor_matrix = np.zeros((len(workloads), len(objective_pairs)))

    for i, workload in enumerate(workloads):
        for j, pair in enumerate(objective_pairs):
            dcor_matrix[i, j] = dcor_values_dict[workload].get(pair, 0)

    plt.figure(figsize=(12, 10))  # Increase the height of the heatmap

    color_sequence = ["#edf8fb", "#ccece6", "#99d8c9", "#66c2a4", "#2ca25f", "#006d2c"]

    cmap = mcolors.LinearSegmentedColormap.from_list("mycmap", color_sequence)
    
    plt.imshow(dcor_matrix, cmap=cmr.fusion_r, interpolation="nearest")
    colorbar =plt.colorbar(shrink=0.57)  # Reduce the size of the colorbar
    
    # set font size of colorbar
    colorbar.ax.tick_params(labelsize=18)

    objective_pairs_short = ['ET-CS', 'ET-CT', 'CS-CT']

    plt.yticks(range(len(workloads)), workloads, fontsize=18)  # Adjust labels as needed
    plt.xticks(range(len(objective_pairs)), ['ET-CS', 'ET-CT', 'CS-CT'], rotation=45, fontsize=18)  # Rotation for better label visibility
    
    # plt.yticks(range(len(objective_pairs)), objective_pairs_short, fontsize=18)
    # plt.xticks(range(len(workloads)), ['1', '2', '3', '4', '5'], fontsize=18)

    plt.savefig(pngs_path / f"heatmap.pdf", format="pdf", bbox_inches="tight")


if __name__ == "__main__":
    gcc_workloads = [
        "cbench-consumer-tiff2rgba",
        "cbench-security-rijndael",
        "cbench-security-pgp",
        "cbench-automotive-qsort1",
        "cbench-automotive-susan-e",
        "cbench-consumer-jpeg-d",
        "cbench-security-sha",
        "cbench-telecom-adpcm-c",
        "cbench-telecom-adpcm-d",
        "cbench-telecom-gsm",
        "cbench-telecom-crc32",
        "cbench-consumer-tiff2bw",
        "cbench-consumer-mad",
        "cbench-network-patricia",
    ]

    objectives = ["execution_time", "file_size", "compilation_time"]

    # dcor_values_dict = {}
    # spearman_corr_dict = {}
    # pearson_corr_dict = {}
    # for workload in gcc_workloads:
    #     file_path = gcc_samples_path / f"GCC_{workload}.json"
    #     df = load_and_prepare_data(file_path, objectives)
    #     dcor_values = cal_dcor(df, objectives)
    #     spearman_corr = cal_spearman_corr(df, objectives)
    #     pearson_corr = cal_pearson_corr(df, objectives)
    #     print(f"dCor values for {workload}: {dcor_values}")
    #     print(f"Spearman correlation for {workload}: {spearman_corr}")

    #     dcor_values_dict[workload] = dcor_values
    #     spearman_corr_dict[workload] = spearman_corr
    #     pearson_corr_dict[workload] = pearson_corr

    # with open(pngs_path / "dcor_values_dict.json", "w") as f:
    #     json.dump(dcor_values_dict, f)

    # with open(pngs_path / "spearman_corr_dict.json", "w") as f:
    #     json.dump(spearman_corr_dict, f)

    # with open(pngs_path / "pearson_corr_dict.json", "w") as f:
    #     json.dump(pearson_corr_dict, f)

    # with open(pngs_path / "dcor_values_dict.json", "r") as f:
    #     dcor_values_dict = json.load(f)

    # with open(pngs_path / "spearman_corr_dict.json", "r") as f:
    #     spearman_corr_dict = json.load(f)

    # with open(pngs_path / "pearson_corr_dict.json", "r") as f:
    #     pearson_corr_dict = json.load(f)
    
    dcor_values_dict = {
        "telecom-adpcm-c": {
            "execution_time-file_size": 0.5096407431062894,
            "execution_time-compilation_time": 0.02156206023915185,
            "file_size-compilation_time": 0.028167304817522342,
        },
        "automotive-qsort1": {
            "execution_time-file_size": 0.24458686101114566,
            "execution_time-compilation_time": 0.4484640731112793,
            "file_size-compilation_time": 0.1319462835609861,
        },
        "network-patricia": {
            "execution_time-file_size": 0.3136478783871287,
            "execution_time-compilation_time": 0.11344940640932157,
            "file_size-compilation_time": 0.23628956882620056,
        },
        "telecom-gsm": {
            "execution_time-file_size": 0.3199972317712137,
            "execution_time-compilation_time": 0.19506712567511303,
            "file_size-compilation_time": 0.08086715789520826,
        },
        "consumer-tiff2rgba": {
            "execution_time-file_size": 0.19036475515437773,
            "execution_time-compilation_time": 0.18802272660380803,
            "file_size-compilation_time": 0.09256748900522595,
        },
    }

    generate_grid_plot(dcor_values_dict)


================================================
FILE: demo/importances/draw_obj_heatmap.py
================================================
import pandas as pd
import numpy as np
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
import matplotlib as mpl
import dcor
import cmasher as cmr
import json
import sys
from pathlib import Path

current_path = Path(__file__).resolve().parent
package_path = current_path.parent.parent
sys.path.insert(0, str(package_path))


pngs_path = package_path / "demo/importances/pngs"

mpl.rcParams["font.family"] = ["serif"]
mpl.rcParams["font.serif"] = ["Times New Roman"]

def generate_grid_plot_combine(dcor_values_dicts):
    # 创建一个图和三个子图（对于三个数据集）
    fig, axs = plt.subplots(1, 3, figsize=(25, 10), constrained_layout=True)
    
    for ax, dcor_values_dict in zip(axs, dcor_values_dicts,):
        workloads = list(dcor_values_dict.keys())
        objective_pairs = list(dcor_values_dict[workloads[0]].keys())
    
        dcor_matrix = np.zeros((len(workloads), len(objective_pairs)))
    
        for i, workload in enumerate(workloads):
            for j, pair in enumerate(objective_pairs):
                dcor_matrix[i, j] = dcor_values_dict[workload].get(pair, 0)
    
        im = ax.imshow(dcor_matrix, cmap=cmr.prinsenvlag_r, interpolation="nearest", vmin=-0.6, vmax=0.6)
    
        ax.set_yticks(range(len(workloads)))
        ax.set_yticklabels(workloads, fontsize=36)
        ax.set_xticks(range(len(objective_pairs)))
        ax.set_xticklabels(objective_pairs, rotation=0, fontsize=36)
    
    cbar = fig.colorbar(im, ax=axs, shrink=1, location='right')
    cbar.ax.tick_params(labelsize=36)  # 设置 color bar 字体大小
    plt.savefig(pngs_path / "combined_heatmap.pdf", format="pdf", bbox_inches="tight")
    

def generate_grid_plot(dcor_values_dict, file_name):
    workloads = list(dcor_values_dict.keys())
    objective_pairs = list(dcor_values_dict[workloads[0]].keys())

    dcor_matrix = np.zeros((len(workloads), len(objective_pairs)))

    for i, workload in enumerate(workloads):
        for j, pair in enumerate(objective_pairs):
            dcor_matrix[i, j] = dcor_values_dict[workload].get(pair, 0)

    plt.figure(figsize=(12, 10))

    plt.imshow(dcor_matrix, cmap=cmr.prinsenvlag_r, interpolation="nearest", vmin=-0.6, vmax=0.6)
    colorbar = plt.colorbar(shrink=1)
    colorbar.ax.tick_params(labelsize=18)

    plt.yticks(range(len(workloads)), workloads, fontsize=18)
    plt.xticks(range(len(objective_pairs)),
               objective_pairs, rotation=0, fontsize=18)

    plt.savefig(pngs_path / f"{file_name}_heatmap.pdf", format="pdf", bbox_inches="tight")


if __name__ == "__main__":
    gcc_dcor_values_dict = {
        "adpcm-c": {"ET-CS": 0.5096407431062894, "ET-CT": 0.02156206023915185, "CS-CT": 0.028167304817522342},
        "qsort1": {"ET-CS": 0.24458686101114566, "ET-CT": 0.4484640731112793, "CS-CT": 0.1319462835609861},
        "patricia": {"ET-CS": 0.3136478783871287, "ET-CT": 0.11344940640932157, "CS-CT": 0.23628956882620056},
        "gsm": {"ET-CS": 0.3199972317712137, "ET-CT": 0.19506712567511303, "CS-CT": 0.08086715789520826},
        "tiff2rgba": {"ET-CS": 0.19036475515437773, "ET-CT": 0.18802272660380803, "CS-CT": 0.09256748900522595},
        "susan-e": {"ET-CS": 0.1362765512460971, "ET-CT": 0.36116979864249992, "CS-CT": 0.05943189644484737},
    }
    
    mysql_dcor_values_dict = {
        "SiBench": {"T-L": 0.4, "T-CU": 0.05, "L-CU": -0.13},
        "Voter": {"T-L": 0.2, "T-CU": 0.03, "L-CU": -0.14},
        "SmallBank": {"T-L": 0.6, "T-CU": 0.24, "L-CU": -0.35},
        "Twitter": {"T-L": 0.25, "T-CU": 0.43, "L-CU": -0.02},
        "TATP": {"T-L": 0.14, "T-CU": 0.05, "L-CU": -0.13},
        "TPC-C": {"T-L": 0.23, "T-CU": 0.16, "L-CU": -0.34},
    }

    hadoop_dcor_values_dict = {
        "WordCount": {"ET-CU": 0.5, "ET-MU": 0.14, "CU-MU": 0.03},
        "KMeans": {"ET-CU": 0.6, "ET-MU": 0.05, "CU-MU": 0.02},
        "Bayes": {"ET-CU": 0.4, "ET-MU": 0.23, "CU-MU": 0.4},
        "NWeight": {"ET-CU": 0.5, "ET-MU": 0.2, "CU-MU": 0.4},
        "PageRank": {"ET-CU": 0.13, "ET-MU": 0.35, "CU-MU": 0.16},
        "TeraSort": {"ET-CU": 0.4, "ET-MU": 0.16, "CU-MU": 0.15},
    }

    # generate_grid_plot(gcc_dcor_values_dict, "gcc")
    # generate_grid_plot(mysql_dcor_values_dict, "mysql")
    # generate_grid_plot(hadoop_dcor_values_dict, "hadoop")
    
    generate_grid_plot_combine([gcc_dcor_values_dict, mysql_dcor_values_dict, hadoop_dcor_values_dict])

================================================
FILE: demo/importances/get_feature_importances.py
================================================
import sys
from pathlib import Path

current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent.parent
sys.path.insert(0, str(package_dir))

import json
import os
import tarfile
from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

# from csstuning.compiler.compiler_benchmark import GCCBenchmark

# data_path = package_dir / "experiment_results" / "gcc_samples"
# data_path = package_dir / "experiment_results" / "gcc_samples"
data_path = package_dir / "experiment_results" / "dbms_sampling"


def load_and_prepare_data(file_path, objectives):
    """
    Loads JSON data and prepares a DataFrame.
    """
    with open(file_path, "r") as f:
        data = json.load(f)

    input_vectors = data["input_vector"]
    output_vectors = data["output_value"]

    df_input = pd.DataFrame(input_vectors)

    df_output = pd.DataFrame(output_vectors)[objectives]
    df_combined = pd.concat([df_input, df_output], axis=1)
    # print(f"Loaded {len(df_combined)} data points")

    df_combined = df_combined.drop_duplicates(subset=df_input.columns.tolist())
    # print(f"Removed {len(df_combined) - len(df_input)} duplicates")

    # for obj in objectives:
    #     df_combined = df_combined[df_combined[obj] != 1e10]
    # print(f"Loaded {len(df_combined)} data points after removing extreme values")
    return df_combined


def calculate_feature_importances(df, objective):
    """
    Calculates and returns feature importances.
    """
    X = df.drop([objective], axis=1)
    y = df[objective]

    model = DecisionTreeRegressor()
    model.fit(X, y)
    feature_importances = model.feature_importances_

    feature_importance_df = pd.DataFrame(
        {"Feature": X.columns, "Importance": feature_importances}
    )
    return feature_importance_df


def aggregate_importances(importances_list):
    """
    Aggregates a list of importance dataframes by taking the mean of importance scores across all repetitions.
    """
    combined_importances = pd.concat(importances_list)
    mean_importances = combined_importances.groupby("Feature").mean().reset_index()
    return mean_importances.sort_values(by="Importance", ascending=False)


def combine_and_rank_features(importances_list):
    """
    Combines feature importance dataframes and ranks features by total importance across all objectives.
    """
    combined = pd.concat(importances_list)
    combined = (
        combined.groupby("Feature")
        .sum()
        .sort_values(by="Importance", ascending=False)
        .reset_index()
    )
    return combined


def get_top_combined_features(common_features, combined_ranked, total_features=20):
    """
    Supplements the common features with additional features from the combined ranking to reach the desired total.
    """
    final_features = list(common_features)

    # Add more features from the combined ranked list until you reach 20
    for feature in combined_ranked["Feature"]:
        if len(final_features) < total_features:
            if (
                feature not in common_features
            ):  # Only add if not already in common_features
                final_features.append(feature)
        else:
            break  # Stop if we have already 20 features

    return final_features


def find_common_features(importances_list):
    """
    Finds the intersection of important features from multiple importance dataframes.
    """
    top_feature_sets = []

    for df in importances_list:
        # Sort by importance and select the top 20 features
        top_features = df.sort_values(by="Importance", ascending=False).head(20)
        # Add the set of top 20 feature names to the list
        top_feature_sets.append(set(top_features["Feature"]))
        
        # print importances
        print("Top 20 Features:")
        print(top_features)

    # Find intersection of all top feature sets
    common_features = set.intersection(*top_feature_sets)
  
    # # print feature and importances
    # print("Common Features:")
    # print(df[df["Feature"].isin(common_features)])
    
    return list(common_features)


def train_and_evaluate_model(
    df, features, objective, use_top_features=False, random_state=42
):
    """
    Trains and evaluates a model, either using top 20 features or all features.
    """
    X = df[features["Feature"]] if use_top_features else df.drop([objective], axis=1)
    y = df[objective]

    # Split and train the model
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    model = DecisionTreeRegressor()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Evaluate the model
    nrmse = np.sqrt(mean_squared_error(y_test, y_pred)) / np.std(y_test)
    feature_set = "Top 20 Features" if use_top_features else "All Features"
    print(f"{feature_set} - Normalized RMSE: {nrmse}")

    # Get and sort feature importances
    feature_importances = model.feature_importances_
    sorted_features = pd.DataFrame(
        {"Feature": X.columns, "Importance": feature_importances}
    ).sort_values(by="Importance", ascending=False)

    # print("Sorted Feature Importances:")
    # print(sorted_features)

    return nrmse


def get_workloads_improved():
    """
    Returns a list of workloads that improved when including objectives.
    """
    iterations = 1
    workloads_improved = []
    
    
    workloads_sampled = []
    
    for file in data_path.glob("*.json"):
        workload = file.name.split(".")[0][4:]
        
        workloads_sampled.append(workload)
        print("==================================================")
        print(workload)
        print("==================================================")

        # Initialize lists to store the results of repeated experiments
        nrmse_excluding_list = []
        nrmse_including_list = []

        for i in range(iterations):
            random_state = 42 + i
            print(f"Running iteration {i+1}/{iterations}...")

            # Repeat the experiment for 'excluding objectives'
            print("CART with top 20 features, excluding objectives")
            df_combined = load_and_prepare_data(file, objectives=["execution_time"])
            
        
            important_features = calculate_feature_importances(
                df_combined, "execution_time"
            )
            nrmse_excluding = train_and_evaluate_model(
                df_combined,
                important_features,
                "execution_time",
                use_top_features=True,
                random_state=random_state,
            )
            nrmse_excluding_list.append(nrmse_excluding)
            print("\n")

            # Repeat the experiment for 'including objectives'
            print("CART with top 20 features, including objectives")
            df_combined = load_and_prepare_data(
                file, objectives=["execution_time", "file_size", "compilation_time"]
            )
            important_features = calculate_feature_importances(
                df_combined, "execution_time"
            )
            nrmse_including = train_and_evaluate_model(
                df_combined,
                important_features,
                "execution_time",
                use_top_features=True,
                random_state=random_state,
            )
            nrmse_including_list.append(nrmse_including)
            print("\n")


        # Calculate average or median NRMSE for both configurations
        avg_nrmse_excluding = np.mean(nrmse_excluding_list)
        avg_nrmse_including = np.mean(nrmse_including_list)

        # Compare and record improvements
        if avg_nrmse_including < avg_nrmse_excluding:
            workloads_improved.append(workload)
        print(f"Average Improvement: {avg_nrmse_excluding - avg_nrmse_including}")
        print("\n\n")

    print(f"Workloads improved: {workloads_improved}")

    return workloads_improved


def get_features_for_exp(workloads, repetitions=5):
    features_by_workload = {}

    for workload in workloads:
        print("==================================================")
        print(workload)
        print("==================================================")
        data_file = data_path / f"DBMS_{workload}.json"
        # data_file = data_path / f"GCC_{workload}.json"
        # data_file = data_path / f"LLVM_{workload}.json"
        features_by_workload[workload] = {}

        # Calculate feature importances for each objective
        importances_et_all, importances_ct_all, importances_fs_all = [], [], []
        for _ in range(repetitions):
            # Repeat the experiment and append the results
            # df_combined = load_and_prepare_data(
            #     data_file, objectives=["execution_time"]
            # )
            # importances_et_all.append(
            #     calculate_feature_importances(df_combined, "execution_time")
            # )

            # df_combined = load_and_prepare_data(
            #     data_file, objectives=["compilation_time"]
            # )
            # importances_ct_all.append(
            #     calculate_feature_importances(df_combined, "compilation_time")
            # )

            # df_combined = load_and_prepare_data(data_file, objectives=["file_size"])
            # importances_fs_all.append(
            #     calculate_feature_importances(df_combined, "file_size")
            # )

            df_combined = load_and_prepare_data(
                data_file, objectives=["throughput"]
            )
            importances_et_all.append(
                calculate_feature_importances(df_combined, "throughput")
            )

            df_combined = load_and_prepare_data(
                data_file, objectives=["latency"]
            )
            importances_ct_all.append(
                calculate_feature_importances(df_combined, "latency")
            )


        # Aggregate the importances from all repetitions
        importances_et = aggregate_importances(importances_et_all)
        importances_ct = aggregate_importances(importances_ct_all)
        # importances_fs = aggregate_importances(importances_fs_all)

        # Find common features across all objectives
        common_features = find_common_features(
            [importances_et, importances_ct]
        )
        # print("Top 20 Features (Common):")
        # print(common_features)
        features_by_workload[workload]["common"] = common_features

        # Combine and rank features by total importance across all objectives
        combined_ranked = combine_and_rank_features(
            [importances_et, importances_ct]
        )
    
        # Get top combined features, ensuring we have 20 total
        top_features = get_top_combined_features(common_features, combined_ranked)

        # print("Top 20 Features (Common + Supplemented):")
        # print(top_features)
        features_by_workload[workload]["top"] = top_features

        # Write feature importances to file

    with open("features_by_workload.json", "w") as fp:
        json.dump(features_by_workload, fp, indent=4)

    # print("Features by workload written to features_by_workload.json")


if __name__ == "__main__":
    # workloads_improved = get_workloads_improved()

    # workloads_improved = [
    #     "cbench-security-sha",
    #     "cbench-telecom-crc32",
    #     "cbench-network-patricia",
    #     "cbench-office-stringsearch2",
    #     "cbench-bzip2",
    #     "cbench-security-rijndael",
    #     "cbench-automotive-bitcount",
    #     "cbench-consumer-tiff2bw",
    #     "cbench-security-pgp",  // Error compiled with LLVM
    #     "cbench-consumer-tiff2rgba",
    #     "cbench-automotive-susan-e",
    #     "cbench-telecom-adpcm-d",
    #     "cbench-telecom-adpcm-c",
    #     "cbench-telecom-gsm",
    # ]

    # GCC
    workloads_improved = [
        "cbench-consumer-tiff2rgba",
        "cbench-security-rijndael",
        "cbench-security-pgp",
        "cbench-automotive-qsort1",
        "cbench-automotive-susan-e",
        "cbench-consumer-jpeg-d",
        "cbench-security-sha",
        "cbench-telecom-adpcm-c",
        "cbench-telecom-adpcm-d",
        "cbench-telecom-gsm",
        
        "cbench-telecom-crc32",
        "cbench-consumer-tiff2bw",
        "cbench-consumer-mad",
        "cbench-network-patricia",

        # "polybench-cholesky",
        # "polybench-fdtd-apml",
        # "polybench-symm",
        # "polybench-ludcmp",
        # "polybench-lu",
        # "polybench-bicg",
        
        
        # "cbench-bzip2",
        # "cbench-office-stringsearch2",
    ]
    
    workloads_gcc_extra = [
        "polybench-3mm",
        "cbench-automotive-susan-c",
        "cbench-consumer-tiff2dither",
        "cbench-automotive-bitcount",
        "polybench-2mm",
        "polybench-adi",
        "cbench-office-stringsearch2",
        "polybench-fdtd-2d",
        "polybench-atax",
        "polybench-doitgen",
        "polybench-durbin",
        "polybench-fdtd-apml",
        "polybench-gemver",
        "polybench-gesummv",      
    ]
    
    # LLVM
    workloads_improved = [
        "cbench-telecom-gsm",
        "cbench-automotive-qsort1",
        "cbench-automotive-susan-e",
        "cbench-consumer-tiff2rgba",
        "cbench-network-patricia",
        "cbench-automotive-bitcount",
        "cbench-bzip2",
        "cbench-consumer-tiff2bw",
        "cbench-consumer-jpeg-d",
        "cbench-telecom-adpcm-c",
        "cbench-telecom-adpcm-d",
        "cbench-office-stringsearch2",
        "cbench-security-rijndael",
        "cbench-security-sha",
    ]
   
    workloads_dbms = [
        "sibench",
        "smallbank",
        "tatp",
        "tpcc",
        "twitter",
        "voter"
    ] 
    get_features_for_exp(workloads_dbms)


================================================
FILE: demo/jacard_exec_times.csv
================================================
1000,2000,3000,4000,5000,6000,7000,8000,10000
0.9119875431060791,2.082753896713257,3.91093111038208,8.557840585708618,12.041692018508911,15.124170541763306,19.65719509124756,23.64489245414734,34.56616735458374
1.0272552967071533,2.4850564002990723,3.6993303298950195,8.74350357055664,11.522526741027832,15.167948007583618,18.998569011688232,23.57488775253296,34.72972536087036
0.8701906204223633,2.342695474624634,3.9779343605041504,8.43106198310852,11.420814752578735,16.141853094100952,20.375312566757202,23.408360719680786,33.90605902671814
0.9109225273132324,2.144861936569214,3.3771023750305176,8.592206239700317,11.48600172996521,14.987765550613403,19.385486602783203,25.1193208694458,33.94826626777649
0.9816443920135498,2.1542670726776123,3.5521466732025146,8.509221315383911,11.628002643585205,14.858725309371948,19.51885724067688,24.59690284729004,33.040677547454834
0.9135477542877197,1.9848182201385498,3.598935127258301,8.426192045211792,11.95021915435791,15.61799430847168,19.14977765083313,24.9086012840271,33.726221799850464
0.8438146114349365,2.085094690322876,3.576479434967041,8.02638292312622,10.94989275932312,15.241029739379883,19.54108452796936,25.013059616088867,32.88712763786316
0.8688364028930664,2.180650472640991,3.596482276916504,8.604441404342651,11.128572225570679,15.095619678497314,18.685551643371582,25.701316595077515,34.67376089096069
0.8366460800170898,2.0028655529022217,3.4182419776916504,8.652469873428345,11.459563732147217,15.82176685333252,19.65123200416565,25.646445989608765,32.839797019958496
0.8481349945068359,2.1892166137695312,3.4449169635772705,8.446269035339355,11.985326766967773,14.893955945968628,19.16008687019348,25.730915069580078,32.78506088256836
0.9579446315765381,2.219630718231201,3.8541202545166016,8.73814082145691,11.567262411117554,15.347697496414185,19.48912000656128,25.423113346099854,33.98611545562744
0.9066781997680664,2.083017349243164,3.588240385055542,8.700974941253662,11.331908941268921,15.30103087425232,19.335123538970947,24.858865976333618,33.50572729110718
0.9746880531311035,2.2279622554779053,4.053081274032593,8.730259418487549,11.936275482177734,15.455094814300537,19.327856302261353,25.628153085708618,32.99900460243225
0.8930683135986328,2.11864972114563,3.7024741172790527,8.344207286834717,11.746542930603027,15.088658809661865,19.43919348716736,25.075968742370605,33.45540761947632
0.856304407119751,2.086219310760498,3.6951184272766113,8.34026026725769,11.830772161483765,14.942124843597412,18.9264714717865,25.86812424659729,33.858113288879395
0.8727133274078369,2.0580806732177734,3.7264912128448486,8.894163370132446,12.052824258804321,14.960201978683472,19.055952310562134,25.78945779800415,32.88934874534607
0.8761835098266602,2.228577136993408,3.4152305126190186,8.170103073120117,11.750890493392944,15.093035459518433,19.775015115737915,25.31575870513916,33.19825792312622
0.8753976821899414,2.107287645339966,3.421565532684326,8.800944566726685,12.298073530197144,15.2935950756073,21.14627766609192,25.744181156158447,33.204394578933716
0.8760173320770264,2.026487350463867,3.8564090728759766,9.433102130889893,11.492626905441284,15.162604808807373,20.254459142684937,25.106750011444092,33.05475950241089
0.8822832107543945,1.976762294769287,3.630432605743408,9.591833591461182,11.193760871887207,15.723769426345825,21.67072629928589,25.18179702758789,33.00413703918457


================================================
FILE: demo/lsh_exec_times.csv
================================================
1000,2000,3000,4000,5000,6000,7000,8000,10000
0.023784637451171875,0.08341550827026367,0.15126347541809082,0.33096837997436523,0.4911954402923584,0.515388011932373,0.690263032913208,0.9283351898193359,1.3463435173034668
0.03152871131896973,0.10286164283752441,0.15152525901794434,0.3354344367980957,0.3960435390472412,0.5925371646881104,0.8368349075317383,0.7942116260528564,1.0759913921356201
0.029398441314697266,0.09281778335571289,0.14369988441467285,0.33049607276916504,0.41103291511535645,0.5936400890350342,0.6794371604919434,0.8794887065887451,1.1260664463043213
0.03680753707885742,0.0930788516998291,0.1341261863708496,0.29814672470092773,0.4796781539916992,0.5141637325286865,0.6429910659790039,0.8827624320983887,1.1970088481903076
0.04126882553100586,0.058476924896240234,0.1401991844177246,0.2537209987640381,0.4159054756164551,0.572455883026123,0.6010398864746094,0.8189992904663086,1.230595350265503
0.03595137596130371,0.08831262588500977,0.11515688896179199,0.28495216369628906,0.378218412399292,0.5232009887695312,0.8514833450317383,0.9663751125335693,1.2129037380218506
0.04663252830505371,0.09302330017089844,0.13613414764404297,0.26587772369384766,0.45294785499572754,0.5516300201416016,0.6722183227539062,0.8564984798431396,1.0223979949951172
0.019811153411865234,0.06587505340576172,0.12438511848449707,0.3414583206176758,0.4270482063293457,0.6210315227508545,0.6754748821258545,0.9284231662750244,1.2596681118011475
0.032245635986328125,0.07642269134521484,0.13181066513061523,0.2506859302520752,0.46262025833129883,0.5530803203582764,0.6569054126739502,0.8058040142059326,1.119572639465332
0.04127001762390137,0.0894927978515625,0.13326454162597656,0.3099794387817383,0.42696142196655273,0.5447602272033691,0.6076810359954834,0.9187808036804199,1.1909763813018799
0.03832602500915527,0.08265113830566406,0.11252927780151367,0.2969646453857422,0.4582955837249756,0.5136613845825195,0.5503523349761963,0.9396407604217529,1.115455150604248
0.03364896774291992,0.07457470893859863,0.211561918258667,0.3259725570678711,0.41351938247680664,0.5627808570861816,0.7018880844116211,0.8254928588867188,1.072371482849121
0.06022810935974121,0.10837268829345703,0.14995479583740234,0.3509492874145508,0.45815014839172363,0.5309557914733887,0.5962278842926025,0.8092830181121826,1.0732884407043457
0.03121781349182129,0.07620978355407715,0.1468205451965332,0.33223938941955566,0.37134265899658203,0.5799720287322998,0.6643342971801758,0.8704044818878174,1.0601885318756104
0.03435707092285156,0.06217193603515625,0.13513994216918945,0.3275594711303711,0.35704994201660156,0.47503113746643066,0.644827127456665,0.8879103660583496,1.2539973258972168
0.03912711143493652,0.08903884887695312,0.12429618835449219,0.25405335426330566,0.5083765983581543,0.5449907779693604,0.7324926853179932,0.8082249164581299,1.013197660446167
0.03409695625305176,0.06731986999511719,0.1349468231201172,0.30829381942749023,0.42665600776672363,0.5346364974975586,0.6694869995117188,1.0194809436798096,1.2974910736083984
0.04721331596374512,0.07333040237426758,0.13997197151184082,0.28172802925109863,0.40156102180480957,0.6314287185668945,0.8593540191650391,0.8863487243652344,1.231271743774414
0.03896808624267578,0.08074784278869629,0.14035487174987793,0.3700544834136963,0.40504980087280273,0.581559419631958,0.7495737075805664,0.8647575378417969,1.1768264770507812
0.03593301773071289,0.08340692520141602,0.13669967651367188,0.28766536712646484,0.38652944564819336,0.5708668231964111,0.7684242725372314,0.9323217868804932,1.3054776191711426


================================================
FILE: demo/random_sample_compiler.py
================================================
import os
import sys

current_dir = os.path.dirname(os.path.abspath(__file__))
package_dir = os.path.dirname(current_dir)
sys.path.insert(0, package_dir)

import argparse
import datetime

import numpy as np
from csstuning.compiler.compiler_benchmark import CompilerBenchmarkBase

from transopt.Benchmark import construct_test_suits
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TaskDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def run_experiments(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = construct_test_suits(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "samples_num",
        type=int,
        help="Number of samples to be collected for each workload",
    )
    parser.add_argument(
        "--split_index",
        type=int,
        help="Index for splitting the workload segments",
        default=0,
    )
    args = parser.parse_args()
    split_index = args.split_index
    samples_num = args.samples_num

    available_workloads = CompilerBenchmarkBase.AVAILABLE_WORKLOADS
    collected_workloads = [
        "cbench-automotive-susan-c",
        "cbench-automotive-bitcount",
        "cbench-security-rijndael",
        "cbench-consumer-tiff2rgba",
        "cbench-telecom-adpcm-d",
        "cbench-consumer-tiff2bw",
        "cbench-telecom-adpcm-c",
        "cbench-consumer-tiff2dither",
        "cbench-telecom-gsm",
        "cbench-automotive-susan-e",
        "cbench-security-sha",
        "cbench-network-patricia",
        "cbench-telecom-crc32",
        "cbench-security-pgp",
        "cbench-consumer-mad",
        "cbench-automotive-qsort1",
        "polybench-cholesky",
        "polybench-trisolv",
        "polybench-adi",
        "polybench-symm",
        "polybench-gesummv",
        "polybench-gemver",
        "polybench-durbin",
        "polybench-atax",
        "polybench-fdtd-apml",
        "polybench-jacobi-1d-imper",
        "polybench-bicg",
        "polybench-syr2k",
        "polybench-mvt",
        "polybench-lu",
        "polybench-3mm",
    ]
    available_workloads = list(set(available_workloads) - set(collected_workloads))

    split_workloads = split_into_segments(available_workloads, 10)

    if split_index >= len(split_workloads):
        raise IndexError("split index out of range")

    workloads = split_workloads[split_index]

    tasks = {
        "GCC": {"budget": samples_num, "workloads": workloads},
        # "LLVM": {"budget": samples_num, "workloads": workloads},
    }

    # Get date and set exp name
    date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    exp_name = f"sampling_compiler_{date}"

    args = argparse.Namespace(
        seed=0,
        optimizer="ParEGO",
        init_number=2,
        init_method="random",
        exp_path=f"{package_dir}/../experiment_results",
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        source_num=2,
        selector="None",
        save_mode=1,
        load_mode=False,
        acquisition_func="LCB",
    )

    run_experiments(tasks, args)


================================================
FILE: demo/random_sample_dbms.py
================================================
current_dir = os.path.dirname(os.path.abspath(__file__))
package_dir = os.path.dirname(current_dir)
sys.path.insert(0, package_dir)

import argparse
import datetime
import os
import sys

import numpy as np
from csstuning.dbms.dbms_benchmark import MySQLBenchmark

from transopt.benchmark import instantiate_problems
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TaskDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def run_experiments(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = instantiate_problems(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--samples_num", type=int, help="Number of samples to be collected for each workload", default=6
    )
    parser.add_argument(
        "--split_index", type=int, help="Index for splitting the workload segments", default=0
    )
    args = parser.parse_args()
    split_index = args.split_index
    samples_num = args.samples_num
    
    available_workloads = MySQLBenchmark.AVAILABLE_WORKLOADS
    split_workloads = split_into_segments(available_workloads, 6)

    if split_index >= len(split_workloads):
        raise IndexError("split index out of range")

    workloads = split_workloads[split_index]

    tasks = {
        "DBMS": {"budget": samples_num, "workloads": workloads},
    }

    # Get date and set exp name
    date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    exp_name = f"sampling_dbms_{date}"

    args = argparse.Namespace(
        seed=0,
        optimizer="ParEGO",
        init_number=2,
        init_method="random",
        exp_path=f"{package_dir}/../experiment_results",
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        source_num=2,
        selector="None",
        save_mode=1,
        load_mode=False,
        acquisition_func="LCB",
    )

    run_experiments(tasks, args)


================================================
FILE: demo/sampling/random_sample_compiler.py
================================================
import os
import sys
from pathlib import Path

current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent.parent
sys.path.insert(0, str(package_dir))

import argparse
import datetime

import numpy as np
from csstuning.compiler.compiler_benchmark import CompilerBenchmarkBase

from transopt.benchmark import instantiate_problems
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TransferDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def run_experiments(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = instantiate_problems(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--samples_num",
        type=int,
        help="Number of samples to be collected for each workload",
        default=5000,
    )
    parser.add_argument(
        "--split_index",
        type=int,
        help="Index for splitting the workload segments",
        default=0,
    )
    args = parser.parse_args()
    split_index = args.split_index
    samples_num = args.samples_num

    available_workloads = CompilerBenchmarkBase.AVAILABLE_WORKLOADS
    # available_workloads = [
    #     "polybench-jacobi-2d-imper",
    #     "polybench-dynprog",
    #     "polybench-medley-reg-detect",
    #     "polybench-trmm",
    #     "polybench-gemm",
    #     "cbench-automotive-susan-s",
    #     "cbench-network-dijkstra",
    #     "cbench-consumer-jpeg-c",
    #     "cbench-bzip2",
    # ]

    split_workloads = split_into_segments(available_workloads, 10)

    if split_index >= len(split_workloads):
        raise IndexError("split index out of range")

    workloads = split_workloads[split_index]

    tasks = {
        # "GCC": {"budget": samples_num, "workloads": workloads},
        "LLVM": {"budget": samples_num, "workloads": workloads},
    }

    # Get date and set exp name
    date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    exp_name = f"sampling_compiler_{date}"

    args = argparse.Namespace(
        seed=0,
        optimizer="ParEGO",
        init_number=100,
        init_method="random",
        exp_path=f"{package_dir}/experiment_results",
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        source_num=2,
        selector=None,
        save_mode=1,
        load_mode=False,
        acquisition_func="LCB",
    )

    run_experiments(tasks, args)


================================================
FILE: demo/sampling/random_sample_dbms.py
================================================
import os
import sys
from pathlib import Path

current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent.parent
sys.path.insert(0, str(package_dir))

import argparse
import datetime
import os
import sys

import numpy as np
from csstuning.dbms.dbms_benchmark import MySQLBenchmark

from transopt.benchmark import instantiate_problems
from transopt.KnowledgeBase.kb_builder import construct_knowledgebase
from transopt.KnowledgeBase.TransferDataHandler import OptTaskDataHandler
from optimizer.construct_optimizer import get_optimizer

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def run_experiments(tasks, args):
    kb = construct_knowledgebase(args)
    testsuits = instantiate_problems(tasks, args.seed)
    optimizer = get_optimizer(args)
    data_handler = OptTaskDataHandler(kb, args)
    optimizer.optimize(testsuits, data_handler)


def split_into_segments(lst, n):
    k, m = divmod(len(lst), n)
    return [lst[i * k + min(i, m) : (i + 1) * k + min(i + 1, m)] for i in range(n)]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--samples_num", type=int, help="Number of samples to be collected for each workload", default=10
    )
    parser.add_argument(
        "--split_index", type=int, help="Index for splitting the workload segments", default=0
    )
    args = parser.parse_args()
    split_index = args.split_index
    samples_num = args.samples_num
    
    available_workloads = MySQLBenchmark.AVAILABLE_WORKLOADS
    split_workloads = split_into_segments(available_workloads, 6)

    if split_index >= len(split_workloads):
        raise IndexError("split index out of range")

    workloads = split_workloads[split_index]

    tasks = {
        "DBMS": {"budget": samples_num, "workloads": workloads},
    }

    # Get date and set exp name
    date = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    exp_name = f"sampling_dbms_{date}"

    args = argparse.Namespace(
        seed=0,
        optimizer="ParEGO",
        init_number=10,
        init_method="random",
        exp_path=f"{package_dir}/experiment_results",
        exp_name=exp_name,
        verbose=True,
        normalize="norm",
        source_num=2,
        selector=None,
        save_mode=1,
        load_mode=False,
        acquisition_func="LCB",
    )

    run_experiments(tasks, args)


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/source/_static/custom.css
================================================
.bd-sidebar-secondary {
    display: none !important;
}

/* 让 bd-article 占据 100% 宽度 */
.bd-main .bd-content {
    flex-grow: 1;
    max-width: 100%;
    width: 100%;
}

.bd-article-container {
    max-width: 100% !important;
    width: 100% !important;
}


.bd-article {
    max-width: 100% !important;
    width: 100% !important;
}

.bd-sidebar-primary {
    flex: 0 0 250px; /* 减小宽度 */
    max-width: 250px;
    padding: 0;
}

.bd-page-width {
    max-width: 100% !important;
    padding-left: 0 !important;
    padding-right: 0 !important;
}

================================================
FILE: docs/source/conf.py
================================================
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
import os
import sys
from os.path import dirname


SOURCE = os.path.dirname(os.path.realpath(__file__))


sys.path.insert(0, SOURCE)

project = 'TransOPT: Transfer Optimization System for Bayesian Optimization Using Transfer Learning'
copyright = '2024, Peili Mao'
author = 'Peili Mao'
release = '0.1.0'


# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions =[
    'sphinx.ext.autodoc',
    'sphinx.ext.napoleon',
    'sphinx_rtd_theme',
    'sphinxcontrib.bibtex',
    
    'sphinx_togglebutton',
    
    'sphinx.ext.mathjax',
    'sphinx.ext.autosummary',
    # 'numpydoc',
    # 'nbsphinx',
    'sphinx.ext.intersphinx',
    'sphinx.ext.coverage',
    # 'matplotlib.sphinxext.plot_directive',
    ]

templates_path = ['_templates']
exclude_patterns = []

bibtex_bibfiles = ['usage/TOS.bib']

html_logo = "_static//figures/transopt_logo.jpg"
# html_favicon = '_static/favicon.ico'


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = 'sphinx_book_theme'
html_static_path = ['_static']
html_css_files = [
    'custom.css',
]


master_doc = 'index'

================================================
FILE: docs/source/development/api_reference.rst
================================================
API Reference
=============

This section provides a detailed reference for the TransOPT API, including descriptions of all available endpoints and methods.

.. automodule:: transopt
   :members:

================================================
FILE: docs/source/development/architecture.rst
================================================
Architecture Overview
======================

This section provides an overview of the architecture of the TransOPT software, illustrating the key components and workflows involved in its operation.

System Architecture
-------------------

The following diagram provides a high-level view of the entire system architecture of TransOPT, showing the interaction between various components.

.. image:: ../images/system_architecture.pdf
   :alt: System Architecture Diagram
   :width: 600px
   :align: center

Workflow
--------

The workflow for using TransOPT is illustrated below. This diagram shows the typical steps a user would follow when working with TransOPT, from defining the problem to obtaining the optimization results.

.. image:: ../images/workflow.pdf
   :alt: TransOPT Workflow
   :width: 600px
   :align: center

Optimizer Architecture
----------------------

TransOPT includes different optimization algorithms. The following diagram highlights the difference between the standard Bayesian Optimization (BO) and Transfer Learning for Bayesian Optimization (TLBO).

### BO vs. Transfer BO

.. image:: ../images/bo_vs_tlbo.pdf
   :alt: BO vs. Transfer BO
   :width: 600px
   :align: center

### Optimizer Workflow

The diagram below illustrates the workflow of the optimizer component within TransOPT, showing how it integrates with other system components.

.. image:: ../images/optimizer.pdf
   :alt: Optimizer Workflow
   :width: 600px
   :align: center

Data Management
---------------

Data management is a critical component of TransOPT, handling the storage, retrieval, and processing of data required for optimization tasks. The following diagram provides an overview of how data is managed within the system.

.. image:: ../images/data_management.pdf
   :alt: Data Management Overview
   :width: 600px
   :align: center

Conclusion
----------

The architecture of TransOPT is designed to be modular and flexible, allowing for easy integration of new algorithms and data management strategies. This overview provides a snapshot of the system's key components and their interactions, setting the stage for more detailed exploration in subsequent sections.


================================================
FILE: docs/source/faq.rst
================================================
FAQ
================================

This section addresses common questions and issues that users might encounter when using TransOPT.

How do I submit the error information to the maintainer?
--------------------------------------------------------
Click on the `Submit error` button on the bottom right corner of the dashboard page. Type in the error information and click on the `Submit` button, the error 
information will be sent to the maintainer.


How do I report a bug?
----------------------
1. Clone the repository:

   ::

     $ export NODE_OPTIONS=--openssl-legacy-provider


================================================
FILE: docs/source/home/feature.html
================================================
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.8.1/css/all.css"
      integrity="sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf" crossorigin="anonymous">

<style>
    #wrapper h4 {
        margin: 5px 0px 15px 0px;
        font-weight: 400;
    }

    .entry {
        height: 100%;
        width: 100%;
    }

    .border {
        border: 1px solid #DCDCDC;
    }

    .icon {
        margin-top: 5px;
    }

    .entry:hover {
        background: #DCDCDC;
        cursor: pointer;
    }
</style>

<div id="wrapper">
    <div class="row row-eq-height">
        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-cogs fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading">Composite Algorithm Design</p>
                        <a>Search Space:</a> <a href="usage/algorithms.html"> Automated prune search space, ...</a><br>
                        <a>Initialization:</a> <a href="usage/algorithms.html"> Meta-learn based initialization, EA-based initialization ...</a><br>
                        <a>Surrogate Model:</a> <a href="usage/algorithms.html"> MTGP, RGPE, Neural Process, ...</a><br>
                        <a>Acquisition Function:</a> <a href="usage/algorithms.html"> Transfre acquisition function, RL acquisition function, ...</a><br>
                    </div>
                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-database fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a>Robust Data Management</a></p>
                        <a>Embedded Database:</a> <a href="usage/algorithms.html"> Utilizes SQLite as an embedded database, enabling seamless data management.</a><br>
                        <a>Integration with External Datasets:</a> <a href="usage/algorithms.html"> Allows integration of public datasets to enhance analysis.</a><br>
                        <a>Data Retrieve:</a> <a href="usage/algorithms.html"> Local-sensitivity based data retrieval approach.</a><br>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <div class="row row-eq-height">
        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-tasks fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a>Benchmark Problems</a></p>
                        <a>Synthetic Problems:</a> <a href="usage/problems.html"> Ackley, ...</a><br>
                        <a>Configurable Software Tuning:</a> <a href="usage/problems.html">GCC, LLVM, MySQL, Hadoop, ...</a><br>
                        <a>Hyperparameter Optimization:</a> <a href="usage/problems.html"> ResNet, DenseNet, AlexNet, ...</a><br>
                        <a>Protein Inverse Folding:</a> <a href="usage/problems.html">Protein Data Bank, CATH, ...</a>,
                        <a>RNA Inverse Design:</a> <a href="usage/problems.html">Eterna100, RNAStralign, Rfam-learn, ...</a>,
                    </div>
                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-globe fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a>Web User Interface</a></p>
                        <a>Intuitive Navigation:</a> <a href="usage/results.html"> Clear menus and sidebars for easy access to features.</a><br>
                        <a>Interactive Data Visualization:</a> <a href="usage/results.html"> Real-time charts and graphs for data results.</a><br>
                        <a>Data Upload Functionality:</a> <a href="usage/results.html"> Direct upload of datasets for transfer and optimization.</a><br>
                        <a>LLM-powered-chatbot:</a> <a href="usage/results.html">Enables natural language interaction.</a><br>
                    </div>
                </div>
            </div>
        </div>
    </div>

    <div class="row row-eq-height">
        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-chart-bar fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a>Results Analysis</a></p>
                        <a>Performance Indicator:</a> <a href="usage/results.html"> MAE, GC-content, Max RSS, ...</a><br>
                        <a>Statistical Measures:</a> <a href="usage/results.html"> Wilcoxon signed-rank test, Scott-Knott test, Critical difference, ...</a><br>
                        <a>Visualization:</a> <a href="usage/results.html"> Optimization trajectory, Multidimensional scaling, ...</a><br>

                    </div>
                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-puzzle-piece fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading">More features are coming soon</p>
                        <p>...</p>
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>


================================================
FILE: docs/source/home/guide.html
================================================
<style>

    .zoom:hover {
        transform: scale(1.07);
    }
</style>


<div class="container">
    <div class="row row-eq-height">


        <div class="col-md d-flex my-1 mx-2 overflow-hidden">
            <div class="card">
                <a href="installation.html"><img class="card-img-top w-100 zoom"
                                                          src="_static/figures/giant.png"
                                                          alt="Icon Getting Started"></a>
                <div class="card-body">
                    <p class="card-text"><b>Getting Started:</b> The key steps in using TransOPT: Installation, Algorithm Selection, Benchmarking Problems, Visualization, and Data Management for effective transfer learning optimization.</p>
                </div>
            </div>
        </div>


        <div class="col-md d-flex my-1 mx-2 overflow-hidden">
            <div class="card">
                <a href="https://colalab.ai/" data-toggle="modal" data-target="#colalab"><img class="card-img-top w-100 zoom"
                                                                              src="_static/figures/colalab.png"
                                                                              alt="Icon colalab"></a>
                <div class="card-body">
                    <p class="card-text"><b>About Us:</b> COLA laboratory is working in computational/artificial intelligence, multi-objective optimization and decision-making, operational research...</p>
                </div>
            </div>
        </div>

        <div class="col-md d-flex my-1 mx-2 overflow-hidden">
            <div class="card">
                <a href=""><img class="card-img-top w-100 zoom"
                                                                              src="_static/figures/research.png"
                                                                              alt="Research with this Package"></a>
                <div class="card-body">
                    <p class="card-text"><b>News:</b> 
                        Our system has been applied in various studies, including protein design, hyperparameter optimization... </p>
                </div>
            </div>
        </div>


    </div>
</div>

================================================
FILE: docs/source/home/portfolio.html
================================================
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.8.1/css/all.css"
      integrity="sha384-50oBUHEmvpQ+1lW4y57PTFmhCaXp0ML5d60M1M7uH2+nqUivzIebhndOJK28anvf" crossorigin="anonymous">

<style>

    #wrapper h4 {
        margin: 5px 0px 15px 0px;
        font-weight: 400;
    }

    .entry {
        height: 100%;
        width: 100%;
    }


    .border {
        border: 1px solid #DCDCDC;
    }

    .icon {
        margin-top: 5px;
    }

    .entry:hover {
        background: #DCDCDC;
        cursor: pointer;
    }


</style>

<div id="wrapper">

    <div class="row row-eq-height">

        <div class="col-12 col-lg-6 p-1" onclick="location.href='interface/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-bullhorn fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="interface/index.html">Interface</a></p>

                        <b>Function:</b>
                        <a href="interface/minimize.html">minimize</a>
                        <p style="margin-bottom:0.4em;"></p><b>Parameters:</b>
                        <a href="problems/index.html">Problem</a>,
                        <a href="algorithms/index.html">Algorithm</a>,
                        <a href="interface/termination.html">Termination</a>
                        <p style="margin-bottom:0.4em;"></p>

                        <b>Optionals:</b>
                        <a href="interface/callback.html">Callback</a>,
                        <a href="interface/display.html">Display</a>,
                        <a href="interface/minimize.html">...</a>

                        <p style="margin-bottom:0.4em;"></p>

                        <b>Returns:</b> <a href="interface/result.html">Result</a>

                        <br>
                        <p style="margin-bottom:0.7em;"></p>

                        <b>Related:</b>
                        <a href="algorithms/usage.html">Ask and Tell</a><img class="new-flag" src="_static/new-flag.svg">,
                        <a href="misc/checkpoint.html">Checkpoints</a>
                    </div>
                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1" onclick="location.href='problems/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">

                    <div class="icon col-2">
                        <i class="fas fa-chess fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="problems/index.html">Problems</a></p>

                        <b>Single-objective:</b>
                        <a href="problems/single/ackley.html">Ackley</a>,
                        <a href="problems/single/griewank.html">Griewank</a>,
                        <a href="problems/single/rastrigin.html">Rastrigin</a>,
                        <a href="problems/single/rosenbrock.html">Rosenbrock</a>,
                        <a href="problems/single/zakharov.html">Zakharov</a>,
                        <a href="problems/index.html#Single-Objective">...</a>
                        <br>
                        <p style="margin-bottom:0.4em;"></p>


                        <b>Multi-objective:</b>
                        <a href="problems/multi/bnh.html">BNH</a>,
                        <a href="problems/multi/osy.html">OSY</a>,
                        <a href="problems/multi/tnk.html">TNK</a>,
                        <a href="problems/multi/truss2d.html">Truss2d</a>,
                        <a href="problems/multi/welded_beam.html">Welded Beam</a>,
                        <a href="problems/multi/zdt.html">ZDT</a>,
                        <a href="problems/index.html#Multi-Objective">...</a>
                        <br>
                        <p style="margin-bottom:0.4em;"></p>


                        <b>Many-objective:</b>
                        <a href="problems/many/dtlz.html">DTLZ</a>,
                        WFG
                        <br>
                        <p style="margin-bottom:0.7em;"></p>

                        <b>Constrained:</b>
                        CTP,
                        <a href="problems/constrained/dascmop.html">DASCMOP</a>,
                        <a href="problems/constrained/modact.html">MODAct</a>,
                        <a href="problems/constrained/mw.html">MW</a>,
                        CDTLZ
                        <br>
                        <p style="margin-bottom:0.4em;"></p>

                        <b>Dynamic:</b>
                        <a href="problems/dynamic/df.html">DF</a>
                        
                        <br>
                        <p style="margin-bottom:0.7em;"></p>
                        

                        <b>Related:</b>
                        <a href="problems/definition.html">Problem Definition</a>,
                        <a href="gradients/index.html">Gradients</a>,
                        <a href="problems/parallelization.html">Parallelization</a>
                    </div>
                </div>
            </div>
        </div>


    </div>


    <div class="row row-eq-height">

        <div class="col-12 col-lg-6 p-1"onclick="location.href='algorithms/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">

                    <div class="icon col-2">
                        <i class="fas fa-search fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="algorithms/index.html">Algorithms</a></p>

                        <b>Single-objective:</b>
                        <a href="algorithms/soo/ga.html">GA</a>,
                        <a href="algorithms/soo/de.html">DE</a>,
                        <a href="algorithms/soo/pso.html">PSO</a>,
                        <a href="algorithms/soo/nelder.html">Nelder Mead</a>,
                        <a href="algorithms/soo/pattern.html">Pattern Search</a>,
                        <a href="algorithms/soo/brkga.html">BRKGA</a>,
                        <a href="algorithms/soo/es.html">ES</a>,
                        <a href="algorithms/soo/sres.html">SRES</a>,
                        <a href="algorithms/soo/isres.html">ISRES</a>,
                        <a href="algorithms/soo/cmaes.html">CMA-ES</a>,
                        <a href="algorithms/soo/g3pcx.html">G3PCX</a><img class="new-flag" src="_static/new-flag.svg">

                        <p style="margin-bottom:0.4em;"></p>
                        <p style="margin-bottom:0.4em;"></p>

                        <b>Multi-objective:</b>
                        <a href="algorithms/moo/nsga2.html">NSGA-II</a>,
                        <a href="algorithms/moo/rnsga2.html">R-NSGA-II</a>

                        <br>
                        <p style="margin-bottom:0.4em;"></p>

                        <b>Many-objective:</b>
                        <a href="algorithms/moo/nsga3.html">NSGA-III</a>,
                        <a href="algorithms/moo/rnsga3.html">R-NSGA-III</a>,
                        <a href="algorithms/moo/unsga3.html">U-NSGA-III</a>,
                        <a href="algorithms/moo/moead.html">MOEA/D</a>,
                        <a href="algorithms/moo/age.html">AGE-MOEA</a>,
                        <a href="algorithms/moo/age2.html">AGE-MOEA2</a>,
                        <a href="algorithms/moo/rvea.html">RVEA</a>,
                        <a href="algorithms/moo/sms.html">SMS-EMOA</a>
                        <br>
                        
                        <b>Dynamic:</b>
                        <a href="algorithms/moo/dnsga2.html">D-NSGA-II</a>,
                        <a href="algorithms/moo/kgb.html">KGB</a><img class="new-flag" src="_static/new-flag.svg">
                        <br>
                        <p style="margin-bottom:0.7em;"></p>

                        <b>Related:</b>
                        <a href="misc/reference_directions.html">Reference Directions</a>,
                        <a href="constraints/index.html">Constraints</a>,
                        <a href="misc/convergence.html">Convergence</a>,
                        <a href="algorithms/hyperparameters.html">Hyperparameters</a><img class="new-flag" src="_static/new-flag.svg">


                    </div>
                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1" onclick="location.href='customization/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-book-open fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="customization/index.html">Customization</a></p>

                        <b>Variable Types:</b>
                        <a href="customization/binary.html">Binary</a>,
                        <a href="customization/discrete.html">Discrete</a>,
                        <a href="customization/permutation.html">Permutation</a>,
                        <a href="customization/mixed.html">Mixed</a><img class="new-flag" src="_static/new-flag.svg">,
                        <a href="customization/custom.html">Custom</a>

                        <br>
                        <p style="margin-bottom:0.4em;"></p>

                        <b>Examples:</b>
                        <a href="customization/initialization.html">Biased Initialization</a>,
                        <a href="customization/permutation.html#Traveling-Salesman-Problem-(TSP)">Traveling Salesman</a>
                    </div>

                </div>
            </div>
        </div>


    </div>


    <div class="row row-eq-height">

        <div class="col-12 col-lg-6 p-1"onclick="location.href='operators/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">


                    <div class="icon col-2">
                        <i class="fas fa-tools fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="operators/index.html">Operators</a></p>


                        <a href="operators/sampling.html">Sampling:</a>
                        Random, LHS
                        </br>
                        <a href="operators/selection.html">Selection:</a>
                        Random, Binary Tournament

                        </br>
                        <a href="operators/crossover.html">Crossover:</a>
                        SBX, UX, HUX, DE Point, Exponential, OX, ERX
                        </br>

                        <a href="operators/mutation.html">Mutation:</a>
                        Polynomial, Bitflip, Inverse Mutation
                        </br>
                        <a href="operators/repair.html">Repair</a>

                    </div>

                </div>
            </div>
        </div>

        <div class="col-12 col-lg-6 p-1" onclick="location.href='visualization/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">

                    <div class="icon col-2">
                        <i class="fas fa-chart-line fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="visualization/index.html">Visualization</a></p>

                        <a href="visualization/scatter.html">Scatter Plot (2D/3D/ND)</a>,
                        <a href="visualization/pcp.html">Parallel Coordinate Plot (PCP) </a>,
                        <a href="visualization/radviz.html">Radviz</a>,
                        <a href="visualization/star.html">Star Coordinates</a>,
                        <a href="visualization/heatmap.html">Heatmap</a>,
                        <a href="visualization/petal.html">Petal Diagram</a>,
                        <a href="visualization/radar.html">Spider Web / Radar</a>,
                        <a href="visualization/video.html">Video</a>

                    </div>
                </div>
            </div>
        </div>


    </div>


    <div class="row row-eq-height">

        <div class="col-12 col-lg-6 p-1" onclick="location.href='mcdm/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">

                    <div class="icon col-2">
                        <i class="fas fa-balance-scale fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="mcdm/index.html">Multi-Criteria Decision Making</a></p>

                        <a href="mcdm/index.html#nb-compromise">Compromise Programming</a>,
                        <a href="mcdm/index.html#nb-pseudo-weights">Pseudo Weights</a>,
                        <a href="mcdm/index.html#nb-high-tradeoff">High Trade-off Points</a>
                    </div>
                </div>
            </div>
        </div>


        <div class="col-12 col-lg-6 p-1" onclick="location.href='misc/indicators.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-medal fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="misc/indicators.html">Performance Indicator</a></p>

                        <a href="misc/indicators.html#nb-gd">GD</a>,
                        <a href="misc/indicators.html#nb-gd-plus">GD+</a>,
                        <a href="misc/indicators.html#nb-igd">IGD</a>,
                        <a href="misc/indicators.html#nb-igd-plus">IGD+</a>,
                        <a href="misc/indicators.html#nb-hv">Hypervolume</a>,
                        <a href="misc/kktpm.html">KKTPM</a>
                    </div>
                </div>
            </div>
        </div>

    </div>

        <div class="row row-eq-height">

        <div class="col-12 col-lg-6 p-1" onclick="location.href='misc/decomposition.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-layer-group fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="misc/decomposition.html">Decomposition</a></p>

                        <a href="misc/decomposition.html#nb-weighted-sum">Weighted-Sum</a>,
                        <a href="misc/decomposition.html#nb-asf">ASF</a>,
                        <a href="misc/decomposition.html#nb-aasf">AASF</a>,
                        <a href="misc/decomposition.html#nb-tchebi">Tchebysheff</a>,
                        <a href="misc/decomposition.html#nb-pbi">PBI</a>
                    </div>
                </div>
            </div>
        </div>


        <div class="col-12 col-lg-6 p-1" onclick="location.href='case_studies/index.html';">
            <div class="entry border p-3">
                <div class="d-flex flex-row">
                    <div class="icon col-2">
                        <i class="fas fa-business-time fa-2x"></i>
                    </div>
                    <div class="desc col-10">
                        <p class="portfolio-heading"><a href="case_studies/index.html">Case Studies</a></p>
                        <a href="case_studies/subset_selection.html">Subset Selection</a>,
                        <a href="case_studies/portfolio_allocation.html">Portfolio Allocation</a><img class="new-flag" src="_static/new-flag.svg">
                        

                    </div>
                </div>
            </div>
        </div>

    </div>


</div>

================================================
FILE: docs/source/index.rst
================================================
.. TransOPT documentation master file, created by
   sphinx-quickstart on Mon Aug 19 16:00:09 2024.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

.. _home:


TRANSOPT: Transfer Optimization System for Bayesian Optimization Using Transfer Learning
========================================================================================
TransOPT is an open-source software platform designed to facilitate the design, benchmarking, and application of transfer learning for Bayesian optimization (TLBO) algorithms through a modular, data-centric framework.

.. raw:: html
   :file: home/guide.html


Video Demonstration
********************************************************************************
Watch the following video for a quick overview of TransOPT's capabilities:

.. raw:: html

   <iframe width="560" height="315" src="https://www.youtube.com/embed/8l25_6fArxY?si=7WunSY06lrQNbkkb" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>


Features
********************************************************************************
TransOPT offers diverse features covering various aspects of transfer optimization.

.. raw:: html
   :file: home/feature.html


Contents
********************************************************************************

.. toctree::
   :maxdepth: 2

   installation
   quickstart
   usage/algorithms
   usage/problems
   usage/results
   usage/data_manage
   usage/visualization
   usage/cli
   development/architecture
   development/api_reference
   faq


Contact
********************************************************************************
| **Peili Mao**  
| *University of Electronic Science and Technology of China*  
| *Department of Computer Science*  
| **E-mail**:  
| peili.z.mao@gmail.com


Cite
********************************************************************************

If you have utilized our framework for research purposes, we kindly invite you to cite our publication as follows:

BibTex:

.. code-block:: bibtex

    @ARTICLE{TransOPT,
      title = {{TransOPT}: Transfer Optimization System for Bayesian Optimization Using Transfer Learning},
      author = {Author Name and Collaborator Name},
      url = {https://github.com/maopl/TransOPT},
      year = {2024}
    }


================================================
FILE: docs/source/installation.rst
================================================
Installation Guide
==================

This section will guide you through the steps required to install TransOPT on your system.

Before installing, ensure you have the following installed:

- Python 3.10
- Node.js 17.9.1
- npm 8.11.0

1. Clone the repository:

   .. code-block:: console 

     $ git clone https://github.com/maopl/TransOpt.git

2. Install the required dependencies:

   .. code-block:: console 

     $ cd TransOpt
     $ python setup.py install


3. Install the frontend dependencies:

   .. code-block:: console 

     $ cd webui && npm install

4. (Optional) Install additional extensions:

   You can enhance the functionality of the system by installing the following optional packages:

   - **Extension 1**: Provides advanced results analysis.

     .. code-block:: console 

       $ python setup.py install[analysis]

   - **Extension 2**: Adds support for distributed computing.

     .. code-block:: console 

       $ python setup.py install[remote]


5. (Optional) Install optional Docker containers:

   The following Docker containers are available to provide additional problem generators:

   - **Inverse RNA Design**: Provides inverse RNA design problem generators:

     .. code-block:: console 

       $ bash scripts/init_docker.sh

   - **Protein Design**: Adds support for distributed computing.

     .. code-block:: console 

       $ bash scripts/init_csstuning.sh

   - **Configurable Software Tuning**: Enables integration with external APIs.

     .. code-block:: console 

       $ bash scripts/init_csstuning.sh

================================================
FILE: docs/source/quickstart.rst
================================================
Quick Start
======================

TransOPT is a sophisticated system designed to facilitate transfer optimization services and experiments. It is composed of two parts: the agent and the web user interface. The agent is responsible for running the optimization algorithms and the web user interface is responsible for displaying the results and managing the experiments.

Start the backend agent:

.. code-block:: console 

  $ python transopt/agent/app.py


Web User Interface Mode
-----------------------
When TransOPT has been started successfully, go to the directory of webui and start the webui on your local machine. Enable the user interface mode with the following command:

.. code-block:: console 

  $ cd webui && npm start


Command Line Mode
-----------------

In addition to the web UI mode, TransOPT also offers a Command Line (CMD) mode for users who may not have access to a display screen, such as when working on a remote server.

To run TransOPT in CMD mode, use the following command:

.. code-block:: console 

  $ python transopt/agent/run_cli.py -n MyTask -v 3 -o 2 -m RF -acf UCB -b 300

This command sets up a task named `MyTask` with 3 variables and 2 objectives, using a Random Forest model (`RF`) and the Upper Confidence Bound (`UCB`) acquisition function, with a budget of 300 function evaluations.

For a complete list of available options and more detailed usage instructions, please refer to the :ref:`CLI documentation <command_line_usage>`.


================================================
FILE: docs/source/usage/TOS.bib
================================================
%!BibTeX

@article{QureshiIGKWUHLYA23,
    author       = {Rizwan Qureshi and
                    Muhammad Irfan and
                    Taimoor Muzaffar Gondal and
                    Sheheryar Khan and
                    Jia Wu and
                    Muhammad Usman Hadi and
                    John Heymach and
                    Xiuning Le and
                    Hong Yan and
                    Tanvir Alam},
    title        = {AI in drug discovery and its clinical relevance},
    journal      = {Heliyon.},
    volume       = {9},
    number       = {7},
    pages        = {e17575},
    year         = {2023}
}

@article{jomaasg21,
    author       = {hadi s. jomaa and
                    lars schmidt{-}thieme and
                    josif grabocka},
    title        = {dataset2vec: learning dataset meta-features},
    journal      = {data min. knowl. discov.},
    volume       = {35},
    number       = {3},
    pages        = {964--985},
    year         = {2021},
    timestamp    = {tue, 07 may 2024 20:27:49 +0200},
    biburl       = {https://dblp.org/rec/journals/datamine/jomaasg21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{MittasA13,
    author    = {Nikolaos Mittas and
                 Lefteris Angelis},
    title     = {Ranking and Clustering Software Cost Estimation Models through a Multiple Comparisons Algorithm},
    journal   = {{IEEE} Trans. Software Eng.},
    volume    = {39},
    number    = {4},
    pages     = {537--551},
    year      = {2013},
}

@inproceedings{MirandaYWK22,
    author       = {Brando Miranda and
                    Patrick Yu and
                    Yu-Xiong Wang and
                    Sanmi Koyejo},
    title        = {The Curse of Low Task Diversity: On the Failure of Transfer Learning to Outperform MAML and their Empirical Equivalence},
    booktitle    = {NeurIPS 2022 Workshop on MetaLearn},
    pages        = {770--778},
    year         = {2022}
}

@inproceedings{ZhangH24,
    author       = {Guanhua Zhang and
                    Moritz Hardt},
    title        = {Inherent Trade-Offs between Diversity and Stability in Multi-Task Benchmarks},
    booktitle    = {ICML'24: Proc. of the 41st International Conference on Machine Learning},
    year         = {2024},
    note         = {accepted for publication}
}

@inproceedings{TripuraneniJJ20,
    author       = {Nilesh Tripuraneni and
                    Michael I. Jordan and
                    Chi Jin},
    title        = {On the Theory of Transfer Learning: The Importance of Task Diversity},
    booktitle    = {NeurIPS'20: Proc. of the 33rd Annual Conference on Neural Information Processing Systems},
    year         = {2020},
    timestamp    = {Wed, 07 Dec 2022 22:58:55 +0100},
    biburl       = {https://dblp.org/rec/conf/nips/TripuraneniJJ20.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{FeurerSH14,
    author       = {Matthias Feurer and
                    Jost Tobias Springenberg and
                    Frank Hutter},
    title        = {Using Meta-Learning to Initialize {Bayesian} Optimization of Hyperparameters},
    booktitle    = {Proc. of the International Workshop on Meta-learning and Algorithm Selection colocated with 21st European Conference on Artificial Intelligence},
    series       = {{CEUR} Workshop Proceedings},
    volume       = {1201},
    pages        = {3--10},
    publisher    = {CEUR-WS.org},
    year         = {2014},
    timestamp    = {Fri, 10 Mar 2023 16:22:14 +0100},
    biburl       = {https://dblp.org/rec/conf/ecai/FeurerSH14.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{HeZRS16,
    author       = {Kaiming He and
                    Xiangyu Zhang and
                    Shaoqing Ren and
                    Jian Sun},
    title        = {Deep Residual Learning for Image Recognition},
    booktitle    = {CVPR'16: Proc. of 2016 {IEEE} Conference on Computer Vision and Pattern Recognition},
    pages        = {770--778},
    publisher    = {{IEEE} Computer Society},
    year         = {2016},
    timestamp    = {Fri, 24 Mar 2023 00:02:57 +0100},
    biburl       = {https://dblp.org/rec/conf/cvpr/HeZRS16.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{BrownMRSKDNSSAA20,
    author       = {Tom B. Brown and
                    Benjamin Mann and
                    Nick Ryder and
                    Melanie Subbiah and
                    Jared Kaplan and
                    Prafulla Dhariwal and
                    Arvind Neelakantan and
                    Pranav Shyam and
                    Girish Sastry and
                    Amanda Askell and
                    Sandhini Agarwal and
                    Ariel Herbert{-}Voss and
                    Gretchen Krueger and
                    Tom Henighan and
                    Rewon Child and
                    Aditya Ramesh and
                    Daniel M. Ziegler and
                    Jeffrey Wu and
                    Clemens Winter and
                    Christopher Hesse and
                    Mark Chen and
                    Eric Sigler and
                    Mateusz Litwin and
                    Scott Gray and
                    Benjamin Chess and
                    Jack Clark and
                    Christopher Berner and
                    Sam McCandlish and
                    Alec Radford and
                    Ilya Sutskever and
                    Dario Amodei},
    title        = {Language Models are Few-Shot Learners},
    booktitle    = {NeurIPS'20: Proc. of the 33rd Annual Conference on Neural Information Processing Systems},
    year         = {2020},
    timestamp    = {Thu, 25 May 2023 10:38:31 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/BrownMRSKDNSSAA20.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{ShahriariSWAF16,
    author       = {Bobak Shahriari and
                    Kevin Swersky and
                    Ziyu Wang and
                    Ryan P. Adams and
                    Nando de Freitas},
    title        = {Taking the Human Out of the Loop: {A} Review of {Bayesian} Optimization},
    journal      = {Proc. {IEEE}},
    volume       = {104},
    number       = {1},
    pages        = {148--175},
    year         = {2016},
    timestamp    = {Fri, 02 Oct 2020 14:42:23 +0200},
    biburl       = {https://dblp.org/rec/journals/pieee/ShahriariSWAF16.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Frazier18,
    author       = {Peter I. Frazier},
    title        = {A Tutorial on {Bayesian} Optimization},
    journal      = {CoRR},
    volume       = {abs/1807.02811},
    year         = {2018},
    url          = {http://arxiv.org/abs/1807.02811},
    eprinttype   = {arXiv},
    eprint       = {1807.02811},
    timestamp    = {Mon, 13 Aug 2018 16:48:03 +0200},
    biburl       = {https://dblp.org/rec/journals/corr/abs-1807-02811.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{vazquez24,
  title={De novo design of high-affinity binders of bioactive helical peptides},
  author={V{\'a}zquez Torres, Susana and Leung, Philip JY and Venkatesh, Preetham and Lutz, Isaac D and Hink, Fabian and Huynh, Huu-Hien and Becker, Jessica and Yeh, Andy Hsien-Wei and Juergens, David and Bennett, Nathaniel R and others},
  journal={Nature},
  volume={626},
  number={7998},
  pages={435--442},
  year={2024},
  publisher={Nature Publishing Group UK London}
}


@inproceedings{SnoekLA12,
    author       = {Jasper Snoek and
                    Hugo Larochelle and
                    Ryan P. Adams},
    title        = {Practical {Bayesian} Optimization of Machine Learning Algorithms},
    booktitle    = {NIPS'12: Proc. of the 26th Annual Conference on Neural Information Processing Systems},
    pages        = {2960--2968},
    year         = {2012},
    timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/SnoekLA12.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LiXKZ20,
  author       = {Shibo Li and
                  Wei W. Xing and
                  Robert M. Kirby and
                  Shandian Zhe},
  title        = {Multi-Fidelity Bayesian Optimization via Deep Neural Networks},
  booktitle    = {NIPS'20: The Proc. of the 33th Advances in Neural Information Processing Systems},
  year         = {2020},
  timestamp    = {Sun, 19 Mar 2023 20:50:17 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/LiXKZ20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{hansen2010,
  title={Comparing results of 31 algorithms from the black-box optimization benchmarking BBOB-2009},
  author={Hansen, Nikolaus and Auger, Anne and Ros, Raymond and Finck, Steffen and Po{\v{s}}{\'\i}k, Petr},
  booktitle={Proceedings of the 12th annual conference companion on Genetic and evolutionary computation},
  pages={1689--1696},
  year={2010}
}


@inproceedings{SegreraPM08,
  author       = {Saddys Segrera and
                  Joel Pinho Lucas and
                  Mar{\'{\i}}a N. Moreno Garc{\'{\i}}a},
  title        = {Information-Theoretic Measures for Meta-learning},
  booktitle    = {HAIS‘08: Proc. of the 2008 Hybrid Artificial Intelligence Systems, Third International Workshop},
  series       = {Lecture Notes in Computer Science},
  volume       = {5271},
  pages        = {458--465},
  publisher    = {Springer},
  year         = {2008},
  timestamp    = {Tue, 14 May 2019 10:00:51 +0200},
  biburl       = {https://dblp.org/rec/conf/hais/SegreraPM08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{Ho95,
  author       = {Tin Kam Ho},
  title        = {Random decision forests},
  booktitle    = {ICDAR'95: Proc. of the Third International Conference on Document Analysis and Recognition},
  pages        = {278--282},
  publisher    = {{IEEE} Computer Society},
  year         = {1995},
  timestamp    = {Fri, 24 Mar 2023 00:05:08 +0100},
  biburl       = {https://dblp.org/rec/conf/icdar/Ho95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{HuangLMW17,
  author       = {Gao Huang and
                  Zhuang Liu and
                  Laurens van der Maaten and
                  Kilian Q. Weinberger},
  title        = {Densely Connected Convolutional Networks},
  booktitle    = {CVPR'17: Proc. of the 2017 {IEEE} Conference on Computer Vision and Pattern Recognition},
  pages        = {2261--2269},
  publisher    = {{IEEE} Computer Society},
  year         = {2017},
  timestamp    = {Mon, 28 Aug 2023 21:17:39 +0200},
  biburl       = {https://dblp.org/rec/conf/cvpr/HuangLMW17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{FeurerKESBH15,
    author       = {Matthias Feurer and
                    Aaron Klein and
                    Katharina Eggensperger and
                    Jost Tobias Springenberg and
                    Manuel Blum and
                    Frank Hutter},
    title        = {Efficient and Robust Automated Machine Learning},
    booktitle    = {NIPS'15: Proc. of the 28th Annual Conference on Neural Information Processing Systems},
    pages        = {2962--2970},
    year         = {2015},
    timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/FeurerKESBH15.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Cowen-RiversLTW22,
    author       = {Alexander I. Cowen{-}Rivers and
                    Wenlong Lyu and
                    Rasul Tutunov and
                    Zhi Wang and
                    Antoine Grosnit and
                    Ryan{-}Rhys Griffiths and
                    Alexandre Max Maraval and
                    Jianye Hao and
                    Jun Wang and
                    Jan Peters and
                    Haitham Bou{-}Ammar},
    title        = {{HEBO:} {An} Empirical Study of Assumptions in {Bayesian} Optimisation},
    journal      = {J. Artif. Intell. Res.},
    volume       = {74},
    pages        = {1269--1349},
    year         = {2022},
    timestamp    = {Mon, 28 Aug 2023 21:18:41 +0200},
    biburl       = {https://dblp.org/rec/journals/jair/Cowen-RiversLTW22.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{ZhuangQDXZZXH21,
  author       = {Fuzhen Zhuang and
                  Zhiyuan Qi and
                  Keyu Duan and
                  Dongbo Xi and
                  Yongchun Zhu and
                  Hengshu Zhu and
                  Hui Xiong and
                  Qing He},
  title        = {A Comprehensive Survey on Transfer Learning},
  journal      = {Proc. {IEEE}},
  volume       = {109},
  number       = {1},
  pages        = {43--76},
  year         = {2021},
  timestamp    = {Mon, 26 Jun 2023 20:52:19 +0200},
  biburl       = {https://dblp.org/rec/journals/pieee/ZhuangQDXZZXH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{BaiLSZZC23,
    author       = {Tianyi Bai and
                    Yang Li and
                    Yu Shen and
                    Xinyi Zhang and
                    Wentao Zhang and
                    Bin Cui},
    title        = {Transfer Learning for {Bayesian} Optimization: {A} Survey},
    journal      = {CoRR},
    volume       = {abs/2302.05927},
    year         = {2023},
    url          = {https://doi.org/10.48550/arXiv.2302.05927},
    doi          = {10.48550/ARXIV.2302.05927},
    eprinttype   = {arXiv},
    eprint       = {2302.05927},
    timestamp    = {Wed, 01 Mar 2023 21:16:31 +0100},
    biburl       = {https://dblp.org/rec/journals/corr/abs-2302-05927.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{CortesV95,
  author       = {Corinna Cortes and
                  Vladimir Vapnik},
  title        = {Support-Vector Networks},
  journal      = {Mach. Learn.},
  volume       = {20},
  number       = {3},
  pages        = {273--297},
  year         = {1995},
  timestamp    = {Mon, 02 Mar 2020 16:28:45 +0100},
  biburl       = {https://dblp.org/rec/journals/ml/CortesV95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{KrizhevskySH12,
  author       = {Alex Krizhevsky and
                  Ilya Sutskever and
                  Geoffrey E. Hinton},
  title        = {ImageNet Classification with Deep Convolutional Neural Networks},
  booktitle    = {NIPS:'12: Proc. of the 26th Annual
                  Conference on Neural Information Processing Systems},
  pages        = {1106--1114},
  year         = {2012},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/KrizhevskySH12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{ChenG16,
  author       = {Tianqi Chen and
                  Carlos Guestrin},
  title        = {XGBoost: {A} Scalable Tree Boosting System},
  booktitle    = {KDD'16: Proc. of the 22nd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining,},
  pages        = {785--794},
  publisher    = {{ACM}},
  year         = {2016},
  timestamp    = {Sat, 17 Dec 2022 01:15:30 +0100},
  biburl       = {https://dblp.org/rec/conf/kdd/ChenG16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@article{VanschorenRBT13,
  author       = {Joaquin Vanschoren and
                  Jan N. van Rijn and
                  Bernd Bischl and
                  Lu{\'{\i}}s Torgo},
  title        = {OpenML: networked science in machine learning},
  journal      = {{SIGKDD} Explor.},
  volume       = {15},
  number       = {2},
  pages        = {49--60},
  year         = {2013},
  timestamp    = {Tue, 29 Sep 2020 10:56:50 +0200},
  biburl       = {https://dblp.org/rec/journals/sigkdd/VanschorenRBT13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{BombarelliWDHSSAHAA18,
    author       = {Rafael G\'omez-Bombarelli and
                    Jennifer N. Wei and
                    David Duvenaud and
                    Jos\'e Miguel Hern\'andez-Lobato and
                    Benjam\'in S\'anchez-Lengeling and
                    Dennis Sheberla and
                    Jorge Aguilera-Iparraguirre and
                    Timothy D. Hirzel and
                    Ryan P. Adams and
                    Al\'an Aspuru-Guzik},
    title        = {Automatic Chemical Design Using a Data-Driven Continuous Representation of Molecules},
    journal      = {ACS Cent. Sci.},
    volume       = {4},
    number       = {2},
    pages        = {268--276},
    year         = {2018}
}

@inproceedings{KorovinaXKNPSX20,
    author       = {Ksenia Korovina and
                    Sailun Xu and
                    Kirthevasan Kandasamy and
                    Willie Neiswanger and
                    Barnab{\'{a}}s P{\'{o}}czos and
                    Jeff Schneider and
                    Eric P. Xing},
    title        = {{ChemBO}: {Bayesian} Optimization of Small Organic Molecules with Synthesizable Recommendations},
    booktitle    = {AISTAS'20: Proc. of the 23rd International Conference on Artificial Intelligence and Statistics},
    series       = {Proceedings of Machine Learning Research},
    volume       = {108},
    pages        = {3393--3403},
    publisher    = {{PMLR}},
    year         = {2020},
    timestamp    = {Tue, 17 Nov 2020 16:08:03 +0100},
    biburl       = {https://dblp.org/rec/conf/aistats/KorovinaXKNPSX20.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{HaseRKA18,
    author       = {Florian H\"ase and
                    Lo\"ic M Roch and
                    Christoph Kreisbeck and
                    Al\'an Aspuru-Guzik},
    title        = {Phoenics: {A} {Bayesian} Optimizer for Chemistry},
    journal      = {ACS Cent Sci},
    volume       = {4},
    issue        = {9},
    pages        = {1134--1145},
    year         = {2018}
}

@article{WangHXLHLXT23,
    author       = {Xiaoqian Wang and
                    Yang Huang and
                    Xiaoyu Xie and
                    Yan Liu and
                    Ziyu Huo and
                    Maverick Lin and
                    Hongliang Xin and
                    Rong Tong},
    title        = {Bayesian-optimization-assisted discovery of stereoselective aluminum complexes for ring-opening polymerization of racemic lactide},
    journal      = {Nat. Commun.},
    volume       = {14},
    issue        = {3647},
    year         = {2023},
    pages        = {1--11}
}

@article{RaoG24,
    author       = {Anish Rao and
                    Marek Grzelczak},
    title        = {Revisiting {El-Sayed} Synthesis: {Bayesian} Optimization for Revealing New Insights during the Growth of Gold Nanorods},
    journal      = {Chem. Mater.},
    volume       = {36},
    issue        = {5},
    year         = {2024},
    pages        = {2577--2587}
}


@article{ShieldsSLPDAJAD21,
    author       = {Benjamin J. Shields and
                    Jason Stevens and
                    Jun Li and
                    Marvin Parasram and
                    Farhan Damani and
                    Jesus I. Martinez Alvarado and
                    Jacob M. Janey and
                    Ryan P. Adams and
                    Abigail G. Doyle},
    title        = {Bayesian reaction optimization as a tool for chemical synthesis},
    journal      = {Nature},
    volume       = {590},
    year         = {2021},
    pages        = {89--96}
}

@Inbook{Frazier2016,
    author    = {Peter I. Frazier and
                 Jialei Wang},
    editor    = {Turab Lookman and
                 Francis J. Alexander and
                 Krishna Rajan},
    title     = {Bayesian Optimization for Materials Design},
    bookTitle = {Information Science for Materials Discovery and Design},
    year      = {2016},
    publisher = {Springer International Publishing},
    address   = {Cham},
    pages     = {45--75},
    isbn      = {978-3-319-23871-5}
}

@book{Packwood17,
    author    = {Daniel Packwood},
    title     = {Bayesian Optimization for Materials Science},
    publisher = {Springer Singapore},
    year      = {2017},
    month     = {October}
}

@article{ZhangAC20,
    author       = {Yichi Zhang and
                    Daniel W. Apley and
                    Wei Chen},
    title        = {Bayesian Optimization for Materials Design with Mixed Quantitative and Qualitative Variables},
    journal      = {Sci. Rep.},
    volume       = {10},
    issue        = {1},
    year         = {2020},
    pages        = {2045--2322}
}

@article{Barnes11,
    author       = {Chris P. Barnes and
                    Daniel Silk and
                    Xia Sheng and
                    Michael P. H. Stumpf },
    title        = {Bayesian design of synthetic biological systems},
    journal      = {PNAS},
    volume       = {108},
    issue        = {37},
    year         = {2011},
    pages        = {15190--15195}
}

@article{AraujoVS22,
    author       = {Robyn P. Araujo and
                    Sean T. Vittadello and
                    Michael P. H. Stumpf},
    title        = {Bayesian and Algebraic Strategies to Design in Synthetic Biology},
    journal      = {Proc. {IEEE}},
    volume       = {110},
    number       = {5},
    pages        = {675--687},
    year         = {2022},
    timestamp    = {Mon, 13 Jun 2022 20:57:05 +0200},
    biburl       = {https://dblp.org/rec/journals/pieee/AraujoVS22.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{MerzbacherAO23,
    author       = {Charlotte Merzbacher and
                    Oisin Mac Aodha and
                    Diego A. Oyarz\'un},
    title        = {Bayesian Optimization for Design of Multiscale Biological Circuits},
    journal      = {ACS Synth. Biol.},
    volume       = {12},
    issue        = {7},
    year         = {2023},
    pages        = {2073--2082}
}

@article{HuangQZTLC23,
    author       = {Shiyue Huang and
                    Yanzhao Qin and
                    Xinyi Zhang and
                    Yaofeng Tu and
                    Zhongliang Li and
                    Bin Cui},
    title        = {Survey on performance optimization for database systems},
    journal      = {Sci. China Inf. Sci.},
    volume       = {66},
    number       = {2},
    year         = {2023},
    timestamp    = {Thu, 02 Mar 2023 13:59:22 +0100},
    biburl       = {https://dblp.org/rec/journals/chinaf/HuangQZTLC23.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{HellstenSLLHEKS23,
    author       = {Erik Orm Hellsten and
                    Artur L. F. Souza and
                    Johannes Lenfers and
                    Rubens Lacouture and
                    Olivia Hsu and
                    Adel Ejjeh and
                    Fredrik Kjolstad and
                    Michel Steuwer and
                    Kunle Olukotun and
                    Luigi Nardi},
    title        = {{BaCO}: {A} Fast and Portable {Bayesian} Compiler Optimization Framework},
    booktitle    = {ASPLOS'23: Proc. of the 28th {ACM} International Conference on Architectural Support for Programming Languages and Operating Systems},
    pages        = {19--42},
    publisher    = {{ACM}},
    year         = {2023},
    timestamp    = {Sat, 10 Feb 2024 18:04:52 +0100},
    biburl       = {https://dblp.org/rec/conf/asplos/HellstenSLLHEKS23.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{BanchhorS21,
    author       = {Chitrakant Banchhor and
                    N. Srinivasu},
    title        = {Analysis of Bayesian optimization algorithms for big data classification
        based on Map Reduce framework},
    journal      = {J. Big Data},
    volume       = {8},
    number       = {1},
    pages        = {81},
    year         = {2021},
    timestamp    = {Fri, 11 Jun 2021 17:01:34 +0200},
    biburl       = {https://dblp.org/rec/journals/jbd/BanchhorS21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{ShiltonGRV17,
    author       = {Alistair Shilton and
                    Sunil Gupta and
                    Santu Rana and
                    Svetha Venkatesh},
    title        = {Regret Bounds for Transfer Learning in {Bayesian} Optimisation},
    booktitle    = {AISTATS'17: Proc of the 2017 International Conference on Artificial Intelligence
        and Statistics},
    volume       = {54},
    pages        = {307--315},
    publisher    = {{PMLR}},
    year         = {2017},
    timestamp    = {Sun, 02 Oct 2022 15:54:22 +0200},
    biburl       = {https://dblp.org/rec/conf/aistats/ShiltonGRV17.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{RamachandranGRV18,
    author       = {Anil Ramachandran and
                    Sunil Gupta and
                    Santu Rana and
                    Svetha Venkatesh},
    title        = {Selecting Optimal Source for Transfer Learning in Bayesian Optimisation},
    booktitle    = {PRICAI'18: Proc of the 2018 Trends in Artificial Intelligence - 15th Pacific Rim International Conference on Artificial Intelligence},
    series       = {Lecture Notes in Computer Science},
    volume       = {11012},
    pages        = {42--56},
    publisher    = {Springer},
    year         = {2018},
    timestamp    = {Mon, 04 Nov 2019 12:36:13 +0100},
    biburl       = {https://dblp.org/rec/conf/pricai/RamachandranGRV18.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{TighineanuSBRBV22,
    author       = {Petru Tighineanu and
                    Kathrin Skubch and
                    Paul Baireuther and
                    Attila Reiss and
                    Felix Berkenkamp and
                    Julia Vinogradska},
    title        = {Transfer Learning with {Gaussian} Processes for {Bayesian} Optimization},
    booktitle    = {AISTATS'22: Procof the 2022 International Conference on Artificial Intelligence and Statistics},
    volume       = {151},
    pages        = {6152--6181},
    publisher    = {{PMLR}},
    year         = {2022},
    timestamp    = {Fri, 20 May 2022 16:11:25 +0200},
    biburl       = {https://dblp.org/rec/conf/aistats/TighineanuSBRBV22.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{SwerskySA13,
    author       = {Kevin Swersky and
                    Jasper Snoek and
                    Ryan Prescott Adams},
    title        = {Multi-Task {Bayesian} Optimization},
    booktitle    = {NIPs'13: Proc of the 2013 Annual Conference on Neural Information Processing Systems},
    pages        = {2004--2012},
    year         = {2013},
    timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/SwerskySA13.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{MossLR20,
    author       = {Henry B. Moss and
                    David S. Leslie and
                    Paul Rayson},
    title        = {{MUMBO:} MUlti-task Max-Value {Bayesian} Optimization},
    booktitle    = {ECML/PKDD'20: Proc. of the 2020 European Conference on Machine Learning and Knowledge Discovery in Databases},
    series       = {Lecture Notes in Computer Science},
    volume       = {12459},
    pages        = {447--462},
    publisher    = {Springer},
    year         = {2020},
    timestamp    = {Tue, 21 Mar 2023 21:00:11 +0100},
    biburl       = {https://dblp.org/rec/conf/pkdd/MossLR20.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{TaylorFWJGCJL23,
    author    = {Connor J. Taylor and
                 Kobi C. Felton and
                 Daniel Wigh and
                 Mohammed I. Jeraal and
                 Rachel Grainger and
                 Gianni Chessari and
                 Christopher N. Johnson and
                 Alexei A. Lapkin},
    title     = {Accelerated Chemical Reaction Optimization Using Multi-Task Learning},
    journal   = {ACS Cent. Sci.},
    volume    = {9},
    issue     = {5},
    year      = {2023}
}

@inproceedings{VolppFFDFHD20,
    author       = {Michael Volpp and
                    Lukas P. Fr{\"{o}}hlich and
                    Kirsten Fischer and
                    Andreas Doerr and
                    Stefan Falkner and
                    Frank Hutter and
                    Christian Daniel},
    title        = {Meta-Learning Acquisition Functions for Transfer Learning in Bayesian
        Optimization},
    booktitle    = {ICLR'20: Proc. of the 8th International Conference on Learning Representations},
    publisher    = {OpenReview.net},
    year         = {2020},
    timestamp    = {Thu, 14 Oct 2021 10:00:36 +0200},
    biburl       = {https://dblp.org/rec/conf/iclr/VolppFFDFHD20.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{ZimmerLH21,
    author       = {Lucas Zimmer and
                    Marius Lindauer and
                    Frank Hutter},
    title        = {{Auto-Pytorch}: Multi-Fidelity MetaLearning for Efficient and Robust {AutoDL}},
    journal      = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
    volume       = {43},
    number       = {9},
    pages        = {3079--3090},
    year         = {2021},
    timestamp    = {Wed, 16 Mar 2022 23:54:55 +0100},
    biburl       = {https://dblp.org/rec/journals/pami/ZimmerLH21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{FeurerEFLH22,
    author       = {Matthias Feurer and
                    Katharina Eggensperger and
                    Stefan Falkner and
                    Marius Lindauer and
                    Frank Hutter},
    title        = {Auto-Sklearn 2.0: Hands-free {AutoML} via Meta-Learning},
    journal      = {J. Mach. Learn. Res.},
    volume       = {23},
    pages        = {261:1--261:61},
    year         = {2022},
    timestamp    = {Sun, 12 Nov 2023 02:20:14 +0100},
    biburl       = {https://dblp.org/rec/journals/jmlr/FeurerEFLH22.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{FeurerSH15,
  author       = {Matthias Feurer and
                  Jost Tobias Springenberg and
                  Frank Hutter},
  title        = {Initializing Bayesian Hyperparameter Optimization via Meta-Learning},
  booktitle    = {AAAI'15: Proc. of the 2015 {AAAI} Conference on Artificial Intelligence},
  pages        = {1128--1135},
  publisher    = {{AAAI} Press},
  year         = {2015},
  timestamp    = {Mon, 18 Sep 2023 11:22:44 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/FeurerSH15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{WistubaSS15a,
  author       = {Martin Wistuba and
                  Nicolas Schilling and
                  Lars Schmidt{-}Thieme},
  title        = {Learning hyperparameter optimization initializations},
  booktitle    = {DSAA'15: Proc. of the 2015 {IEEE} International Conference on Data Science and Advanced
                  Analytics},
  pages        = {1--10},
  publisher    = {{IEEE}},
  year         = {2015},
  timestamp    = {Wed, 16 Oct 2019 14:14:55 +0200},
  biburl       = {https://dblp.org/rec/conf/dsaa/WistubaSS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{MaravelZGA23,
    author       = {Alexandre Maraval and
                    Matthieu Zimmer and
                    Antoine Grosnit and
                    Haitham Bou Ammar},
    booktitle    = {NeurIPS'23: Proc. of 36th Advances in Neural Information Processing Systems},
    title        = {End-to-End Meta-{Bayesian} Optimisation with Transformer Neural Processes},
    volume       = {36},
    pages        = {11246--11260},
    year         = {2023}
}

@inproceedings{HsiehHL21,
    author       = {Bing{-}Jing Hsieh and
                    Ping{-}Chun Hsieh and
                    Xi Liu},
    title        = {Reinforced Few-Shot Acquisition Function Learning for {Bayesian} Optimization},
    booktitle    = {NeurIPS'21: Proc. of the 34th Annual Conference on Neural Information Processing Systems},
    pages        = {7718--7731},
    year         = {2021},
    timestamp    = {Tue, 03 May 2022 16:20:47 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/HsiehHL21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{WistubaG21,
    author       = {Martin Wistuba and
                    Josif Grabocka},
    title        = {Few-Shot Bayesian Optimization with Deep Kernel Surrogates},
    booktitle    = {ICLR'21: Proc. of the 9th International Conference on Learning Representations},
    publisher    = {OpenReview.net},
    year         = {2021},
    timestamp    = {Wed, 23 Jun 2021 17:36:40 +0200},
    biburl       = {https://dblp.org/rec/conf/iclr/WistubaG21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{MallikBHSJLNH23,
    author       = {Neeratyoy Mallik and
                    Edward Bergman and
                    Carl Hvarfner and
                    Danny Stoll and
                    Maciej Janowski and
                    Marius Lindauer and
                    Luigi Nardi and
                    Frank Hutter},
    title        = {{PriorBand}: Practical Hyperparameter Optimization in the Age of Deep Learning},
    booktitle    = {NeurIPS'23: Proc. of 36th Advances in Neural Information Processing Systems},
    pages        = {7377--7391},
    volume       = {36},
    year         = {2023}
}

@inproceedings{BalandatKJDLWB20,
  author       = {Maximilian Balandat and
                  Brian Karrer and
                  Daniel R. Jiang and
                  Samuel Daulton and
                  Benjamin Letham and
                  Andrew Gordon Wilson and
                  Eytan Bakshy},
  title        = {{BoTorch}: {A} Framework for Efficient Monte-Carlo {Bayesian} Optimization},
  booktitle    = {NeurIPS'20: Proc. of the 33rd Annual Conference on Neural Information Processing Systems},
  year         = {2020},
  timestamp    = {Tue, 19 Jan 2021 15:57:16 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/BalandatKJDLWB20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{BergstraYC13,
    author       = {James Bergstra and
                    Dan Yamins and
                    David D. Cox},
    title        = {{HyperOpt}: {A} {Python} Library for Optimizing the Hyperparameters of Machine Learning Algorithms},
    booktitle    = {SciPy'13: Proc of the 2013 Python in Science Conference},
    pages        = {13--19},
    publisher    = {scipy.org},
    year         = {2013},
    timestamp    = {Wed, 03 May 2023 17:10:38 +0200},
    biburl       = {https://dblp.org/rec/conf/scipy/BergstraYC13.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{AkibaSYOK19,
    author       = {Takuya Akiba and
                    Shotaro Sano and
                    Toshihiko Yanase and
                    Takeru Ohta and
                    Masanori Koyama},
    title        = {Optuna: {A} Next-generation Hyperparameter Optimization Framework},
    booktitle    = {KDD'19: Proc of the 2019 {ACM} {SIGKDD} International Conference on Knowledge Discovery {\&} Data Mining},
    pages        = {2623--2631},
    publisher    = {{ACM}},
    year         = {2019},
    timestamp    = {Tue, 16 Aug 2022 23:04:27 +0200},
    biburl       = {https://dblp.org/rec/conf/kdd/AkibaSYOK19.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{NardiSKO19,
    author       = {Luigi Nardi and
                    Artur L. F. Souza and
                    David Koeplinger and
                    Kunle Olukotun},
    title        = {{HyperMapper}: a Practical Design Space Exploration Framework},
    booktitle    = {MASCOTS'19: Proc of the 2019 {IEEE} International Symposium on Modeling, Analysis, and Simulation of Computer and Telecommunication Systems},
    pages        = {425--426},
    publisher    = {{IEEE} Computer Society},
    year         = {2019},
    timestamp    = {Thu, 09 Jul 2020 14:09:10 +0200},
    biburl       = {https://dblp.org/rec/conf/mascots/NardiSKO19.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{RijnBTGUFWWBV13,
    author       = {Jan N. van Rijn and
                    Bernd Bischl and
                    Lu{\'{\i}}s Torgo and
                    Bo Gao and
                    Venkatesh Umaashankar and
                    Simon Fischer and
                    Patrick Winter and
                    Bernd Wiswedel and
                    Michael R. Berthold and
                    Joaquin Vanschoren},
    title        = {{OpenML}: {A} Collaborative Science Platform},
    booktitle    = {PKDD'13: Proc of the 2013 Machine Learning and Knowledge Discovery in Databases - European Conference},
    series       = {Lecture Notes in Computer Science},
    volume       = {8190},
    pages        = {645--649},
    publisher    = {Springer},
    year         = {2013},
    timestamp    = {Tue, 21 Mar 2023 21:00:11 +0100},
    biburl       = {https://dblp.org/rec/conf/pkdd/RijnBTGUFWWBV13.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{FalknerKH18,
    author       = {Stefan Falkner and
                    Aaron Klein and
                    Frank Hutter},
    title        = {{BOHB:} Robust and Efficient Hyperparameter Optimization at Scale},
    booktitle    = {ICML'18: Proceedings of the 35th International Conference on Machine Learning},
    series       = {Proceedings of Machine Learning Research},
    volume       = {80},
    pages        = {1436--1445},
    publisher    = {{PMLR}},
    year         = {2018},
    timestamp    = {Wed, 03 Apr 2019 18:17:30 +0200},
    biburl       = {https://dblp.org/rec/conf/icml/FalknerKH18.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Krizhevsky09,
  title={Learning Multiple Layers of Features from Tiny Images},
  author={Alex Krizhevsky},
  year={2009},
}

@article{Deng12,
  title={The MNIST Database of Handwritten Digit Images for Machine Learning Research [Best of the Web]},
  author={Li Deng},
  journal={IEEE Signal Processing Magazine},
  year={2012},
  volume={29},
  pages={141-142},
}

@article{Runge24,
  title={RnaBench: A Comprehensive Library for In Silico RNA Modelling},
  author={Frederic Runge and Karim Farid and Jorg K. H. Franke and Frank Hutter},
  journal={bioRxiv},
  year={2024},
}

@article{Kalvari20,
  title={Rfam 14: expanded coverage of metagenomic, viral and microRNA families},
  author={Ioanna Kalvari and Eric P. Nawrocki and Nancy Ontiveros-Palacios and Joanna Argasinska and Kevin Lamkiewicz and Manja Marz and Sam Griffiths-Jones and Claire Toffano-Nioche and Daniel Gautheret and Zasha Weinberg and Elena Rivas and Sean R. Eddy and Robert D. Finn and Alex Bateman and Anton I. Petrov},
  journal={Nucleic Acids Research},
  year={2020},
  volume={49},
  pages={D192 - D200},
}

@inproceedings{Suganthan05,
  title={Problem Definitions and Evaluation Criteria for the CEC 2005 Special Session on Real-Parameter Optimization},
  author={Ponnuthurai Nagaratnam Suganthan and Nikolaus Hansen and Jing J. Liang and Kalyanmoy Deb and Ying-Ping Chen and Anne Auger and Santosh Tiwari},
  year={2005},
}

@inproceedings{Netzer11,
  title={Reading Digits in Natural Images with Unsupervised Feature Learning},
  author={Yuval Netzer and Tao Wang and Adam Coates and A. Bissacco and Bo Wu and A. Ng},
  year={2011},
}

@article{kushner1964,
  title={A new method of locating the maximum point of an arbitrary multipeak curve in the presence of noise},
  author={Kushner, Harold J},
  year={1964}
}

@article{HansenARMTB21,
    author       = {Nikolaus Hansen and
                    Anne Auger and
                    Raymond Ros and
                    Olaf Mersmann and
                    Tea Tusar and
                    Dimo Brockhoff},
    title        = {{COCO:} a platform for comparing continuous optimizers in a black-box
        setting},
    journal      = {Optim. Methods Softw.},
    volume       = {36},
    number       = {1},
    pages        = {114--144},
    year         = {2021},
    timestamp    = {Mon, 03 Jan 2022 21:54:56 +0100},
    biburl       = {https://dblp.org/rec/journals/oms/HansenARMTB21.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{LindauerEFBDBRS22,
  author       = {Marius Lindauer and
                  Katharina Eggensperger and
                  Matthias Feurer and
                  Andr{\'{e}} Biedenkapp and
                  Difan Deng and
                  Carolin Benjamins and
                  Tim Ruhkopf and
                  Ren{\'{e}} Sass and
                  Frank Hutter},
  title        = {{SMAC3:} {A} Versatile Bayesian Optimization Package for Hyperparameter
                  Optimization},
  journal      = {J. Mach. Learn. Res.},
  volume       = {23},
  pages        = {54:1--54:9},
  year         = {2022},
  timestamp    = {Wed, 07 Dec 2022 23:05:46 +0100},
  biburl       = {https://dblp.org/rec/journals/jmlr/LindauerEFBDBRS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{LiSZCJLJG0Y0021,
    author       = {Yang Li and
                    Yu Shen and
                    Wentao Zhang and
                    Yuanwei Chen and
                    Huaijun Jiang and
                    Mingchao Liu and
                    Jiawei Jiang and
                    Jinyang Gao and
                    Wentao Wu and
                    Zhi Yang and
                    Ce Zhang and
                    Bin Cui},
    title        = {OpenBox: {A} Generalized Black-box Optimization Service},
    booktitle    = {{KDD} '21: The 27th {ACM} {SIGKDD} Conference on Knowledge Discovery and Data Mining},
    pages        = {3209--3219},
    publisher    = {{ACM}},
    year         = {2021},
    timestamp    = {Sat, 09 Apr 2022 12:35:08 +0200},
    biburl       = {https://dblp.org/rec/conf/kdd/LiSZCJLJG0Y0021.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{GolovinSMKKS17,
    author       = {Daniel Golovin and
                    Benjamin Solnik and
                    Subhodeep Moitra and
                    Greg Kochanski and
                    John Karro and
                    D. Sculley},
    title        = {Google Vizier: {A} Service for Black-Box Optimization},
    booktitle    = {KDD'17: Proc. of the 23rd {ACM} {SIGKDD} International Conference on Knowledge Discovery and Data Mining},
    pages        = {1487--1495},
    publisher    = {{ACM}},
    year         = {2017},
    timestamp    = {Fri, 25 Dec 2020 01:14:16 +0100},
    biburl       = {https://dblp.org/rec/conf/kdd/GolovinSMKKS17.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@misc{OpenAI2022ChatGPT,
    author       = {OpenAI},
    title        = {Introducing ChatGPT},
    year         = {2022},
    howpublished = {\url{https://openai.com/blog/chatgpt/}}
}

@article{KudithipudiAB22,
    author    = {Dhireesha Kudithipudi and
                 Mario Aguilar-Simon and
                 Jonathan Babb and
                 et al.},
    title     = {Biological underpinnings for lifelong learning machines},
    journal   = {Nat. Mach. Intell.},
    year      = {2022},
    volume    = {4},
    pages     = {196--210}
}

@article{LiCY23,
    author    = {Ke Li and
                 Renzhi Chen and
                 Xin Yao},
    title     = {A Data-Driven Evolutionary Transfer Optimization for Expensive Problems in Dynamic Environments},
    journal   = {IEEE Trans. Evol. Comput.},
    year      = {2023},
    note      = {in press}
}

@book{Hazan22,
    author    = {Elad Hazan},
    title     = {Introduction to Online Convex Optimization},
    publisher = {The MIT Press},
    edition   = {Second},
    series    = {Adaptive Computation and Machine Learning series},
    year      = {2022},
    month     = {September},
    isbn      = {9780262046985}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% KL references %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

@article{RiversLLT2022,
author = {Cowen-Rivers, Alexander and Lyu, Wenlong and Tutunov, Rasul and Wang, Zhi and Grosnit, Antoine and Griffiths, Ryan-Rhys and Maravel, Alexandre and Hao, Jianye and Wang, Jun and Peters, Jan and Bou Ammar, Haitham},
year = {2022},
month = {07},
pages = {},
title = {HEBO: Pushing The Limits of Sample-Efficient Hyperparameter Optimisation},
volume = {74},
journal = {Journal of Artificial Intelligence Research}
}

@article{LinYLWW22,
  author       = {Kevin Lin and
                  Zhengyuan Yang and
                  Linjie Li and
                  Jianfeng Wang and
                  Lijuan Wang},
  title        = {DEsignBench: Exploring and Benchmarking {DALL-E} 3 for Imagining Visual
                  Design},
  journal      = {CoRR},
  volume       = {abs/2310.15144},
  year         = {2023},
  eprinttype    = {arXiv},
  eprint       = {2310.15144},
  timestamp    = {Mon, 30 Oct 2023 11:06:08 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2310-15144.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{EsterKSX96,
  author       = {Martin Ester and
                  Hans{-}Peter Kriegel and
                  J{\"{o}}rg Sander and
                  Xiaowei Xu},
  editor       = {Evangelos Simoudis and
                  Jiawei Han and
                  Usama M. Fayyad},
  title        = {A Density-Based Algorithm for Discovering Clusters in Large Spatial
                  Databases with Noise},
  booktitle    = {Proceedings of the Second International Conference on Knowledge Discovery
                  and Data Mining (KDD-96), Portland, Oregon, {USA}},
  pages        = {226--231},
  publisher    = {{AAAI} Press},
  timestamp    = {Sun, 05 Aug 2018 22:58:23 +0200},
  biburl       = {https://dblp.org/rec/conf/kdd/EsterKSX96.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{MossLGR21,
  author       = {Henry B. Moss and
                  David S. Leslie and
                  Javier Gonzalez and
                  Paul Rayson},
  title        = {{GIBBON:} General-purpose Information-Based {Bayesian} Optimisation},
  journal      = {J. Mach. Learn. Res.},
  volume       = {22},
  pages        = {235:1--235:49},
  year         = {2021},
  timestamp    = {Mon, 31 Jan 2022 17:23:36 +0100},
  biburl       = {https://dblp.org/rec/journals/jmlr/MossLGR21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{WistubaSS18,
    author       = {Martin Wistuba and
                    Nicolas Schilling and
                    Lars Schmidt{-}Thieme},
    title        = {Scalable {Gaussian} process-based transfer surrogates for hyperparameter optimization},
    journal      = {Mach. Learn.},
    volume       = {107},
    number       = {1},
    pages        = {43--78},
    year         = {2018},
    timestamp    = {Mon, 02 Mar 2020 16:28:54 +0100},
    biburl       = {https://dblp.org/rec/journals/ml/WistubaSS18.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Wang2021,
    title      = {Pre-trained {Gaussian} processes for {Bayesian} optimization},
    author     = {Wang, Zi and Dahl, George E and Swersky, Kevin and Lee, Chansoo and Mariet, Zelda and Nado, Zachary and Gilmer, Justin and Snoek, Jasper and Ghahramani, Zoubin},
    journal    = {arXiv preprint arXiv:2109.08215},
    year       = {2021}
}

@article{LiSJZLLZC22,
  author       = {Yang Li and
                  Yu Shen and
                  Huaijun Jiang and
                  Wentao Zhang and
                  Jixiang Li and
                  Ji Liu and
                  Ce Zhang and
                  Bin Cui},
  title        = {Hyper-Tune: Towards Efficient Hyper-parameter Tuning at Scale},
  journal      = {Proc. {VLDB} Endow.},
  volume       = {15},
  number       = {6},
  pages        = {1256--1265},
  year         = {2022},
  timestamp    = {Sat, 28 Oct 2023 13:59:30 +0200},
  biburl       = {https://dblp.org/rec/journals/pvldb/LiSJZLLZC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{isaacs06,
  title={RNA synthetic biology},
  author={Isaacs, Farren J and Dwyer, Daniel J and Collins, James J},
  journal={Nature biotechnology},
  volume={24},
  number={5},
  pages={545--554},
  year={2006},
  publisher={Nature Publishing Group US New York}
}


@article{pineda2021hpob,
  author    = {Sebastian Pineda{-}Arango and
               Hadi S. Jomaa and
               Martin Wistuba and
               Josif Grabocka},
  title     = {{HPO-B:} {A} Large-Scale Reproducible Benchmark for Black-Box {HPO} based on OpenML},
  booktitle    = {NIPS'21: Proc of the 2021 Neural Information Processing Systems Track on Datasets and Benchmarks},
  year      = {2021}
}


@article{ZhangCLWTLC22,
  author       = {Xinyi Zhang and
                  Zhuo Chang and
                  Yang Li and
                  Hong Wu and
                  Jian Tan and
                  Feifei Li and
                  Bin Cui},
  title        = {Facilitating Database Tuning with Hyper-Parameter Optimization: {A}
                  Comprehensive Experimental Evaluation},
  journal      = {Proc. {VLDB} Endow.},
  volume       = {15},
  number       = {9},
  pages        = {1808--1821},
  year         = {2022},
  timestamp    = {Mon, 23 Oct 2023 15:31:40 +0200},
  biburl       = {https://dblp.org/rec/journals/pvldb/ZhangCLWTLC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@article{KuhnPTB18,
  author       = {Daniel K{\"{u}}hn and
                  Philipp Probst and
                  Janek Thomas and
                  Bernd Bischl},
  title        = {Automatic Exploration of Machine Learning Experiments on OpenML},
  journal      = {CoRR},
  volume       = {abs/1806.10961},
  year         = {2018},
  eprinttype    = {arXiv},
  eprint       = {1806.10961},
  timestamp    = {Mon, 13 Aug 2018 16:47:31 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1806-10961.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JiangSLZZ,
  author       = {Huaijun Jiang and
                  Yu Shen and
                  Yang Li and
                  Wentao Zhang and
                  Ce Zhang and
                  Bin Cui},
  title        = {OpenBox: {A} Python Toolkit for Generalized Black-box Optimization},
  journal      = {CoRR},
  volume       = {abs/2304.13339},
  year         = {2023},
  eprinttype    = {arXiv},
  eprint       = {2304.13339},
  timestamp    = {Thu, 12 Oct 2023 13:32:35 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2304-13339.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@article{HEBO22,
  author       = {Alexander I. Cowen{-}Rivers and
                  Wenlong Lyu and
                  Zhi Wang and
                  Rasul Tutunov and
                  Jianye Hao and
                  Jun Wang and
                  Haitham Bou{-}Ammar},
  title        = {{HEBO:} Heteroscedastic Evolutionary {Bayesian} Optimisation},
  journal      = {CoRR},
  volume       = {abs/2012.03826},
  year         = {2020},
  eprinttype    = {arXiv},
  eprint       = {2012.03826},
  timestamp    = {Thu, 10 Nov 2022 17:04:22 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-03826.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Pyzer-Knapp18,
  author       = {Edward O. Pyzer{-}Knapp},
  title        = {{Bayesian} optimization for accelerated drug discovery},
  journal      = {{IBM} J. Res. Dev.},
  volume       = {62},
  number       = {6},
  pages        = {2:1--2:7},
  year         = {2018},
  timestamp    = {Fri, 13 Mar 2020 10:54:17 +0100},
  biburl       = {https://dblp.org/rec/journals/ibmrd/Pyzer-Knapp18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{MinGO21,
  author       = {Alan Tan Wei Min and
                  Abhishek Gupta and
                  Yew{-}Soon Ong},
  title        = {Generalizing Transfer {Bayesian} Optimization to Source-Target Heterogeneity},
  journal      = {{IEEE} Trans Autom. Sci. Eng.},
  volume       = {18},
  number       = {4},
  pages        = {1754--1765},
  year         = {2021},
  timestamp    = {Wed, 03 Nov 2021 08:27:14 +0100},
  biburl       = {https://dblp.org/rec/journals/tase/MinGO21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JiangHQHY18,
  author       = {Min Jiang and
                  Zhongqiang Huang and
                  Liming Qiu and
                  Wenzhen Huang and
                  Gary G. Yen},
  title        = {Transfer Learning-Based Dynamic Multiobjective Optimization Algorithms},
  journal      = {{IEEE} Trans. Evol. Comput.},
  volume       = {22},
  number       = {4},
  pages        = {501--514},
  year         = {2018},
  timestamp    = {Tue, 12 May 2020 16:50:56 +0200},
  biburl       = {https://dblp.org/rec/journals/tec/JiangHQHY18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JiangWQGGT21,
  author       = {Min Jiang and
                  Zhenzhong Wang and
                  Liming Qiu and
                  Shihui Guo and
                  Xing Gao and
                  Kay Chen Tan},
  title        = {A Fast Dynamic Evolutionary Multiobjective Algorithm via Manifold
                  Transfer Learning},
  journal      = {{IEEE} Trans. Cybern.},
  volume       = {51},
  number       = {7},
  pages        = {3417--3428},
  year         = {2021},
  timestamp    = {Sat, 31 Jul 2021 17:21:45 +0200},
  biburl       = {https://dblp.org/rec/journals/tcyb/JiangWQGGT21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@article{QiaoYQ0SYLT23,
  author       = {Kangjia Qiao and
                  Kunjie Yu and
                  Boyang Qu and
                  Jing Liang and
                  Hui Song and
                  Caitong Yue and
                  Hongyu Lin and
                  Kay Chen Tan},
  title        = {Dynamic Auxiliary Task-Based Evolutionary Multitasking for Constrained
                  Multiobjective Optimization},
  journal      = {{IEEE} Trans. Evol. Comput.},
  volume       = {27},
  number       = {3},
  pages        = {642--656},
  year         = {2023},
  timestamp    = {Thu, 15 Jun 2023 21:57:41 +0200},
  biburl       = {https://dblp.org/rec/journals/tec/QiaoYQ0SYLT23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{LiuW19,
  author       = {Zhi{-}Zhong Liu and
                  Yong Wang},
  title        = {Handling Constrained Multiobjective Optimization Problems With Constraints
                  in Both the Decision and Objective Spaces},
  journal      = {{IEEE} Trans. Evol. Comput.},
  volume       = {23},
  number       = {5},
  pages        = {870--884},
  year         = {2019},
  timestamp    = {Tue, 12 May 2020 16:50:56 +0200},
  biburl       = {https://dblp.org/rec/journals/tec/LiuW19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{ZhaoYSM22,
  author       = {Qi Zhao and
                  Bai Yan and
                  Yuhui Shi and
                  Martin Middendorf},
  title        = {Evolutionary Dynamic Multiobjective Optimization via Learning From
                  Historical Search Process},
  journal      = {{IEEE} Trans. Cybern.},
  volume       = {52},
  number       = {7},
  pages        = {6119--6130},
  year         = {2022},
  timestamp    = {Mon, 25 Jul 2022 08:40:11 +0200},
  biburl       = {https://dblp.org/rec/journals/tcyb/ZhaoYSM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{GuptaOF16,
  author       = {Abhishek Gupta and
                  Yew{-}Soon Ong and
                  Liang Feng},
  title        = {Multifactorial Evolution: Toward Evolutionary Multitasking},
  journal      = {{IEEE} Trans. Evol. Comput.},
  volume       = {20},
  number       = {3},
  pages        = {343--357},
  year         = {2016},
  timestamp    = {Sun, 25 Jul 2021 11:39:56 +0200},
  biburl       = {https://dblp.org/rec/journals/tec/GuptaOF16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{GuptaOFT17,
  author       = {Abhishek Gupta and
                  Yew{-}Soon Ong and
                  Liang Feng and
                  Kay Chen Tan},
  title        = {Multiobjective Multifactorial Optimization in Evolutionary Multitasking},
  journal      = {{IEEE} Trans. Cybern.},
  volume       = {47},
  number       = {7},
  pages        = {1652--1665},
  year         = {2017},
  timestamp    = {Sun, 25 Jul 2021 11:39:09 +0200},
  biburl       = {https://dblp.org/rec/journals/tcyb/GuptaOFT17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@article{BaliGOT21,
  author       = {Kavitesh Kumar Bali and
                  Abhishek Gupta and
                  Yew{-}Soon Ong and
                  Puay Siew Tan},
  title        = {Cognizant Multitasking in Multiobjective Multifactorial Evolution:
                  {MO-MFEA-II}},
  journal      = {{IEEE} Trans. Cybern.},
  volume       = {51},
  number       = {4},
  pages        = {1784--1796},
  year         = {2021},
  timestamp    = {Tue, 01 Jun 2021 09:59:48 +0200},
  biburl       = {https://dblp.org/rec/journals/tcyb/BaliGOT21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DingYJC19,
  author       = {Jinliang Ding and
                  Cuie Yang and
                  Yaochu Jin and
                  Tianyou Chai},
  title        = {Generalized Multitasking for Evolutionary Optimization of Expensive
                  Problems},
  journal      = {{IEEE} Trans. Evol. Comput.},
  volume       = {23},
  number       = {1},
  pages        = {44--58},
  year         = {2019},
  timestamp    = {Tue, 12 May 2020 16:51:00 +0200},
  biburl       = {https://dblp.org/rec/journals/tec/DingYJC19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JoyRGV19,
  author       = {Tinu Theckel Joy and
                  Santu Rana and
                  Sunil Gupta and
                  Svetha Venkatesh},
  title        = {A flexible transfer learning framework for {Bayesian} optimization with convergence guarantee},
  journal      = {Expert Syst. Appl.},
  volume       = {115},
  pages        = {656--672},
  year         = {2019},
  timestamp    = {Sat, 19 Oct 2019 19:03:17 +0200},
  biburl       = {https://dblp.org/rec/journals/eswa/JoyRGV19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{SnoekRSKSSPPA15,
  author       = {Jasper Snoek and
                  Oren Rippel and
                  Kevin Swersky and
                  Ryan Kiros and
                  Nadathur Satish and
                  Narayanan Sundaram and
                  Md. Mostofa Ali Patwary and
                  Prabhat and
                  Ryan P. Adams},
  title        = {Scalable {Bayesian} Optimization Using Deep Neural Networks},
  booktitle    = {ICML'15: Proc. of the 2015 International Conference on Machine Learning},
  volume       = {37},
  pages        = {2171--2180},
  publisher    = {JMLR.org},
  year         = {2015},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/SnoekRSKSSPPA15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{YuanSASH16,
  author={Yuan, Yuan and Ong, Yew-Soon and Gupta, Abhishek and Tan, Puay Siew and Xu, Hua},
  title={Evolutionary multitasking in permutation-based combinatorial optimization problems: Realization with TSP, QAP, LOP, and JSP},
  booktitle={2016 IEEE Region 10 Conference (TENCON)},
  pages={3157--3164},
  year={2016},
  organization={IEEE}
}

@ariticle{gpyopt2016,
author = {The GPyOpt authors},
title = {{GPyOpt}: A Bayesian Optimization framework in python},
year = {2016}
}

@article{FengZGZZTQ21,
  author       = {Liang Feng and
                  Lei Zhou and
                  Abhishek Gupta and
                  Jinghui Zhong and
                  Zexuan Zhu and
                  Kay Chen Tan and
                  Alex Kai Qin},
  title        = {Solving Generalized Vehicle Routing Problem With Occasional Drivers
                  via Evolutionary Multitasking},
  journal      = {{IEEE} Trans. Cybern.},
  volume       = {51},
  number       = {6},
  pages        = {3171--3184},
  year         = {2021},
  timestamp    = {Tue, 01 Jun 2021 09:59:45 +0200},
  biburl       = {https://dblp.org/rec/journals/tcyb/FengZGZZTQ21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{CollbergP16,
  author       = {Christian S. Collberg and
                  Todd A. Proebsting},
  title        = {Repeatability in computer systems research},
  journal      = {Commun. {ACM}},
  volume       = {59},
  number       = {3},
  pages        = {62--69},
  year         = {2016},
  timestamp    = {Tue, 06 Nov 2018 12:51:42 +0100},
  biburl       = {https://dblp.org/rec/journals/cacm/CollbergP16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{LWFSMP94,
  title={Fast folding and comparison of RNA secondary structures},
  author={Hofacker, Ivo L and Fontana, Walter and Stadler, Peter F and Bonhoeffer, L Sebastian and Tacker, Manfred and Schuster, Peter and others},
  journal={Monatshefte fur chemie},
  volume={125},
  pages={167--167},
  year={1994},
  publisher={SPRINGER VERLAG}
}

@article{Carl83,
  title        = {Molecular Technology: Designing Proteins and Peptides},
  author       = {Pabo, Carl},
  journal      = {Nature},
  volume       = {301},
  number       = {5897},
  pages        = {200},
  year         = {1983},
}

@article{Stephen21,
  title        = {{RCSB Protein Data Bank: Powerful New Tools for Exploring 3D Structures of Biological Macromolecules for Basic and Applied Research and Education in Fundamental Biology, Biomedicine, Biotechnology, Bioengineering, and Energy Sciences}},
  author       = {Burley, Stephen K. and Bhikadiya, Charmi and Bi, Chunxiao and Bittrich, Sebastian and Chen, Li and Crichlow, Gregg V. and Christie, Cole H. and Dalenberg, Kenneth and Di Costanzo, Luigi and Duarte, Jose M. and others},
  journal      = {Nucleic Acids Research},
  volume       = {49},
  number       = {D1},
  pages        = {D437--D451},
  year         = {2021},
  publisher    = {Oxford University Press},
}

@article{Christine97,
  title        = {{CATH--A Hierarchic Classification of Protein Domain Structures}},
  author       = {Orengo, Christine A. and Michie, Alex D. and Jones, Susan and Jones, David T. and Swindells, Mark B. and Thornton, Janet M.},
  journal      = {Structure},
  volume       = {5},
  number       = {8},
  pages        = {1093--1109},
  year         = {1997},
  publisher    = {Elsevier},
}

@article{Yang05,
  title        = {{TM-align: A Protein Structure Alignment Algorithm Based on the TM-score}},
  author       = {Zhang, Yang and Skolnick, Jeffrey},
  journal      = {Nucleic Acids Research},
  volume       = {33},
  number       = {7},
  pages        = {2302--2309},
  year         = {2005},
  publisher    = {Oxford University Press},
}

@article{ChandraGOG18,
  author       = {Rohitash Chandra and
                  Abhishek Gupta and
                  Yew{-}Soon Ong and
                  Chi{-}Keong Goh},
  title        = {Evolutionary Multi-task Learning for Modular Knowledge Representation
                  in Neural Networks},
  journal      = {Neural Process. Lett.},
  volume       = {47},
  number       = {3},
  pages        = {993--1009},
  year         = {2018},
  timestamp    = {Thu, 14 Oct 2021 09:37:11 +0200},
  biburl       = {https://dblp.org/rec/journals/npl/ChandraGOG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{BaoQSBYLC18,
  author       = {Liang Bao and
                  Yutao Qi and
                  Mengqing Shen and
                  Xiaoxuan Bu and
                  Jusheng Yu and
                  Qian Li and
                  Ping Chen},
  title        = {An Evolutionary Multitasking Algorithm for Cloud Computing Service
                  Composition},
  booktitle    = {Services - {SERVICES} 2018 - 14th World Congress, Held as Part of
                  the Services Conference Federation, {SCF} 2018, Seattle, WA, USA,
                  June 25-30, 2018, Proceedings},
  series       = {Lecture Notes in Computer Science},
  volume       = {10975},
  pages        = {130--144},
  publisher    = {Springer},
  year         = {2018},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/services2/BaoQSBYLC18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{PerroneJSA18,
  author       = {Valerio Perrone and
                  Rodolphe Jenatton and
                  Matthias W. Seeger and
                  C{\'{e}}dric Archambeau},
  title        = {Scalable Hyperparameter Transfer Learning},
  booktitle    = {NIPS'18: Proc of the 2018 Advances in Neural Information Processing Systems 31: Annual Conference
                  on Neural Information Processing Systems 2018, NeurIPS 2018, December
                  3-8, 2018, Montr{\'{e}}al, Canada},
  pages        = {6846--6856},
  year         = {2018},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PerroneJSA18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{SchillingWDS15,
  author       = {Nicolas Schilling and
                  Martin Wistuba and
                  Lucas Drumond and
                  Lars Schmidt{-}Thieme},
  title        = {Hyperparameter Optimization with Factorized Multilayer Perceptrons},
  booktitle    = {ECML/PKDD'15: Proc. of the 2015 Machine Learning and Knowledge Discovery in Databases - European Conference},
  volume       = {9285},
  pages        = {87--103},
  publisher    = {Springer},
  year         = {2015},
  timestamp    = {Mon, 30 Nov 2020 08:47:26 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/SchillingWDS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{Broder97,
  author       = {Andrei Z. Broder},
  editor       = {Bruno Carpentieri and
                  Alfredo De Santis and
                  Ugo Vaccaro and
                  James A. Storer},
  title        = {On the resemblance and containment of documents},
  booktitle    = {Compression and Complexity of {SEQUENCES}},
  pages        = {21--29},
  publisher    = {{IEEE}},
  year         = {1997},
  timestamp    = {Wed, 16 Oct 2019 14:14:56 +0200},
  biburl       = {https://dblp.org/rec/conf/sequences/Broder97.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{WangKK18,
  author       = {Zi Wang and
                  Beomjoon Kim and
                  Leslie Pack Kaelbling},
  title        = {Regret bounds for meta {Bayesian} optimization with an unknown {Gaussian}
                  process prior},
  booktitle    = {NIPS: Proc of the 2018 Advances in Neural Information Processing Systems 31: Annual Conference on Neural Information Processing Systems},
  pages        = {10498--10509},
  year         = {2018},
  timestamp    = {Thu, 17 Nov 2022 14:05:51 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/WangKK18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LawZCHS19,
  author       = {Ho Chung Leon Law and
                  Peilin Zhao and
                  Leung Sing Chan and
                  Junzhou Huang and
                  Dino Sejdinovic},
  title        = {Hyperparameter Learning via Distributional Transfer},
  booktitle    = {NIPS'19: Proc of the 2019 Advances in Neural Information Processing Systems Annual Conference},
  pages        = {6801--6812},
  year         = {2019},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/LawZCHS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{SpringenbergKFH16,
  author       = {Jost Tobias Springenberg and
                  Aaron Klein and
                  Stefan Falkner and
                  Frank Hutter},
  title        = {{Bayesian} Optimization with Robust {Bayesian} Neural Networks},
  booktitle    = {NIPS'16: Proc of the 2016 Advances in Neural Information Processing Systems},
  pages        = {4134--4142},
  year         = {2016},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/SpringenbergKFH16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{HutterHL11,
  author       = {Frank Hutter and
                  Holger H. Hoos and
                  Kevin Leyton{-}Brown},
  title        = {Sequential Model-Based Optimization for General Algorithm Configuration},
  booktitle    = {LION'11: Proc. of the 2011 Learning and Intelligent Optimization},
  volume       = {6683},
  pages        = {507--523},
  publisher    = {Springer},
  year         = {2011},
  timestamp    = {Sun, 02 Jun 2019 21:10:54 +0200},
  biburl       = {https://dblp.org/rec/conf/lion/HutterHL11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{BardenetBKS13,
  author       = {R{\'{e}}mi Bardenet and
                  M{\'{a}}ty{\'{a}}s Brendel and
                  Bal{\'{a}}zs K{\'{e}}gl and
                  Mich{\`{e}}le Sebag},
  title        = {Collaborative hyperparameter tuning},
  booktitle    = {ICML'13: Proc of the 2013 International Conference on Machine Learning},
  volume       = {28},
  pages        = {199--207},
  publisher    = {JMLR.org},
  year         = {2013},
  timestamp    = {Wed, 29 May 2019 08:41:45 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/BardenetBKS13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{PfahringerBG00,
    author       = {Bernhard Pfahringer and
                    Hilan Bensusan and
                    Christophe G. Giraud{-}Carrier},
    title        = {Meta-Learning by Landmarking Various Learning Algorithms},
    booktitle    = {ICML'00: Proc of the 17th International Conference on Machine Learning},
    pages        = {743--750},
    publisher    = {Morgan Kaufmann},
    year         = {2000},
    timestamp    = {Sun, 21 Feb 2010 20:54:50 +0100},
    biburl       = {https://dblp.org/rec/conf/icml/PfahringerBG00.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{YogatamaM14,
    author       = {Dani Yogatama and
                    Gideon Mann},
    title        = {Efficient Transfer Learning Method for Automatic Hyperparameter Tuning},
    booktitle    = {AISTATS'14: Proc of the 2014 International Conference on Artificial Intelligence and Statistics},
    volume       = {33},
    pages        = {1077--1085},
    publisher    = {JMLR.org},
    year         = {2014},
    timestamp    = {Wed, 29 May 2019 08:41:44 +0200},
    biburl       = {https://dblp.org/rec/conf/aistats/YogatamaM14.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{PoloczekWF17,
    author       = {Matthias Poloczek and
                    Jialei Wang and
                    Peter I. Frazier},
    title        = {Multi-Information Source Optimization},
    booktitle    = {NIPs'17: Proc of the 2017 Annual Conference on Neural Information Processing Systems},
    pages        = {4288--4298},
    year         = {2017},
    timestamp    = {Thu, 21 Jan 2021 15:15:21 +0100},
    biburl       = {https://dblp.org/rec/conf/nips/PoloczekWF17.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{WistubaSS16,
    author       = {Martin Wistuba and
                    Nicolas Schilling and
                    Lars Schmidt{-}Thieme},
    title        = {Two-Stage Transfer Surrogate Model for Automatic Hyperparameter Optimization},
    booktitle    = {ECML/PKDD'16: Proc. of the 2016 Machine Learning and Knowledge Discovery in Databases.},
    volume       = {9851},
    pages        = {199--214},
    publisher    = {Springer},
    year         = {2016},
    timestamp    = {Thu, 05 Dec 2019 17:07:16 +0100},
    biburl       = {https://dblp.org/rec/conf/pkdd/WistubaSS16.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{FeurerBE15,
    author       = {Matthias Feurer and
                    Benjamin Letham and
                    Eytan Bakshy},
    title        = {Scalable meta-learning for {Bayesian} optimization using ranking-weighted {Gaussian} process ensembles},
    booktitle    = {ICML 2018 AutoML Workshop},
    volume       = {7},
    pages        = {1--15},
    year         = {2018}
}

@article{Javidian19,
  author       = {Mohammad Ali Javidian and
                  Pooyan Jamshidi and
                  Marco Valtorta},
  title        = {Transfer Learning for Performance Modeling of Configurable Systems:
                  {A} Causal Analysis},
  journal      = {CoRR},
  volume       = {abs/1902.10119},
  year         = {2019},
  eprinttype    = {arXiv},
  eprint       = {1902.10119},
  timestamp    = {Tue, 21 May 2019 18:03:38 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1902-10119.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{ZhuH23,
  author       = {Mingxuan Zhu and
                  Dan Hao},
  title        = {Compiler Auto-Tuning via Critical Flag Selection},
  booktitle    = {ASE'23: The Proceeding of 2023 {IEEE/ACM} International Conference on Automated Software Engineering},
  pages        = {1000--1011},
  publisher    = {{IEEE}},
  year         = {2023},
  timestamp    = {Thu, 16 Nov 2023 09:03:51 +0100},
  biburl       = {https://dblp.org/rec/conf/kbse/ZhuH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Nair0MSA20,
  author       = {Vivek Nair and
                  Zhe Yu and
                  Tim Menzies and
                  Norbert Siegmund and
                  Sven Apel},
  title        = {Finding Faster Configurations Using {FLASH}},
  journal      = {{IEEE} Trans. Software Eng.},
  volume       = {46},
  number       = {7},
  pages        = {794--811},
  year         = {2020},
  timestamp    = {Fri, 31 Jul 2020 17:07:30 +0200},
  biburl       = {https://dblp.org/rec/journals/tse/Nair0MSA20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{RothfussHCK21,
  author       = {Jonas Rothfuss and
                  Dominique Heyn and
                  Jinfan Chen and
                  Andreas Krause},
  title        = {Meta-Learning Reliable Priors in the Function Space},
  booktitle    = {NIPS'21: Proc of the 2021 Advances in Neural Information Processing Systems},
  pages        = {280--293},
  year         = {2021},
  timestamp    = {Tue, 03 May 2022 16:20:46 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/RothfussHCK21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{ChenXCZ21,
    author       = {Junjie Chen and
                    Ningxin Xu and
                    Peiqi Chen and
                    Hongyu Zhang},
    title        = {Efficient Compiler Autotuning via {Bayesian} Optimization},
    booktitle    = {ICSE'21: Proc of the 43rd {IEEE/ACM} International Conference on Software Engineering},
    pages        = {1198--1209},
    publisher    = {{IEEE}},
    year         = {2021},
    timestamp    = {Mon, 03 Jan 2022 22:27:59 +0100},
    biburl       = {https://dblp.org/rec/conf/icse/0003XC021.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DalibardSY17,
  author       = {Valentin Dalibard and
                  Michael Schaarschmidt and
                  Eiko Yoneki},
  title        = {{BOAT:} Building Auto-Tuners with Structured {Bayesian} Optimization},
  booktitle    = {WWW'17: Proc of the 2017 International Conference on World Wide Web,
                  {WWW} 2017, Perth, Australia, April 3-7, 2017},
  pages        = {479--488},
  publisher    = {{ACM}},
  year         = {2017},
  timestamp    = {Tue, 06 Nov 2018 16:57:08 +0100},
  biburl       = {https://dblp.org/rec/conf/www/DalibardSY17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{YaoWHXHL21,
  author       = {Huaxiu Yao and
                  Ying Wei and
                  Long{-}Kai Huang and
                  Ding Xue and
                  Junzhou Huang and
                  Zhenhui Li},
  title        = {Functionally Regionalized Knowledge Transfer for Low-resource Drug
                  Discovery},
  booktitle    = {NIPS'21: Proc of the 2021 Advances in Neural Information Processing Systems},
  pages        = {8256--8268},
  year         = {2021},
  timestamp    = {Tue, 03 May 2022 16:20:47 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/YaoWHXHL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{ErikssonPGTP19,
    author       = {David Eriksson and
                    Michael Pearce and
                    Jacob R. Gardner and
                    Ryan Turner and
                    Matthias Poloczek},
    title        = {Scalable Global Optimization via Local {Bayesian} Optimization},
    booktitle    = {NIPS'19: Proc of the 32nd Annual Conference on Neural Information Processing Systems},
    pages        = {5497--5508},
    year         = {2019},
    timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
    biburl       = {https://dblp.org/rec/conf/nips/ErikssonPGTP19.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{PfistererSMBB22,
  author       = {Florian Pfisterer and
                  Lennart Schneider and
                  Julia Moosbauer and
                  Martin Binder and
                  Bernd Bischl},
  title        = {{YAHPO} Gym - An Efficient Multi-Objective Multi-Fidelity Benchmark
                  for Hyperparameter Optimization},
  booktitle    = {AutoML'22: Proc of the 2022 International Conference on Automated Machine Learning},
  series       = {Proceedings of Machine Learning Research},
  volume       = {188},
  pages        = {3/1--39},
  publisher    = {{PMLR}},
  year         = {2022},
  timestamp    = {Mon, 28 Nov 2022 12:30:36 +0100},
  biburl       = {https://dblp.org/rec/conf/automl/PfistererSMBB22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{hirose2021bench,
  title={{NAS-HPO-Bench-II}: A Benchmark Dataset on Joint Optimization of Convolutional Neural Network Architecture and Training Hyperparameters},
  author={Hirose, Yoichi and Yoshinari, Nozomu and Shirakawa,  Shinichi},
  booktitle={Proceedings of the 13th Asian Conference on Machine Learning},
  year={2021}
}
@inproceedings{duan2021transnas,
  title = {TransNAS-Bench-101: Improving Transferability and Generalizability of Cross-Task Neural Architecture Search},
  author = {Duan, Yawen and Chen, Xin and Xu, Hang and Chen, Zewei and Liang, Xiaodan and Zhang, Tong and Li, Zhenguo},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages = {5251--5260},
  year = {2021}
}

@inproceedings{EggenspergerMMF21,
  author       = {Katharina Eggensperger and
                  Philipp M{\"{u}}ller and
                  Neeratyoy Mallik and
                  Matthias Feurer and
                  Ren{\'{e}} Sass and
                  Aaron Klein and
                  Noor H. Awad and
                  Marius Lindauer and
                  Frank Hutter},
  title        = {HPOBench: {A} Collection of Reproducible Multi-Fidelity Benchmark
                  Problems for {HPO}},
  booktitle    = {NIPS'21: Proc of the 2021 Neural Information Processing Systems Track on Datasets and Benchmarks 1},
  year         = {2021},
  timestamp    = {Thu, 05 May 2022 16:53:59 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/EggenspergerMMF21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LiJDRT17,
  author       = {Lisha Li and
                  Kevin G. Jamieson and
                  Giulia DeSalvo and
                  Afshin Rostamizadeh and
                  Ameet Talwalkar},
  title        = {Hyperband: Bandit-Based Configuration Evaluation for Hyperparameter
                  Optimization},
  booktitle    = {ICLR'17: Proc of the 2017 International Conference on Learning Representations},
  publisher    = {OpenReview.net},
  year         = {2017},
  timestamp    = {Thu, 25 Jul 2019 14:26:05 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/LiJDRT17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{LindauerH18,
  author       = {Marius Lindauer and
                  Frank Hutter},
  editor       = {Sheila A. McIlraith and
                  Kilian Q. Weinberger},
  title        = {Warmstarting of Model-Based Algorithm Configuration},
  booktitle    = {AAAI'18: Proc of the 2018 {AAAI} Conference on Artificial Intelligence},
  pages        = {1355--1362},
  publisher    = {{AAAI} Press},
  year         = {2018},
  timestamp    = {Mon, 04 Sep 2023 16:50:25 +0200},
  biburl       = {https://dblp.org/rec/conf/aaai/LindauerH18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{SrinivasKKS10,
    author       = {Niranjan Srinivas and
                    Andreas Krause and
                    Sham M. Kakade and
                    Matthias W. Seeger},
    title        = {{Gaussian} Process Optimization in the Bandit Setting: No Regret and
        Experimental Design},
    booktitle    = {ICML'10: Proc. of the 27th International Conference on Machine Learning},
    pages        = {1015--1022},
    publisher    = {Omnipress},
    year         = {2010},
    timestamp    = {Tue, 23 Jul 2019 15:03:10 +0200},
    biburl       = {https://dblp.org/rec/conf/icml/SrinivasKKS10.bib},
    bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{Tantithamthavorn19,
  author       = {Chakkrit Tantithamthavorn and
                  Shane McIntosh and
                  Ahmed E. Hassan and
                  Kenichi Matsumoto},
  title        = {The Impact of Automated Parameter Optimization on Defect Prediction
                  Models},
  journal      = {{IEEE} Trans. Software Eng.},
  volume       = {45},
  number       = {7},
  pages        = {683--711},
  year         = {2019},
  timestamp    = {Thu, 08 Aug 2019 11:07:40 +0200},
  biburl       = {https://dblp.org/rec/journals/tse/Tantithamthavorn19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{Wilcoxon1945IndividualCB,
    author = {Frank Wilcoxon},
    title  = {Individual Comparisons by Ranking Methods},
    year   = {1945}
}

@article{VarghaD00,
    author    ={Andr{\'a}s Vargha and 
                Harold D. Delaney},
    title     = {A Critique and Improvement of the CL Common Language Effect Size Statistics of McGraw and Wong},
    journal   = {J. Educ. Behav. Stat.},
    volume    = {25},
    number    = {2},
    pages     = {101-132},
    year      = {2000}
}


@article{HennigS12,
  author       = {Philipp Hennig and
                  Christian J. Schuler},
  title        = {Entropy Search for Information-Efficient Global Optimization},
  journal      = {J. Mach. Learn. Res.},
  volume       = {13},
  pages        = {1809--1837},
  year         = {2012},
  timestamp    = {Thu, 02 Jun 2022 13:58:57 +0200},
  biburl       = {https://dblp.org/rec/journals/jmlr/HennigS12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JonesSW98,
  author       = {Donald R. Jones and
                  Matthias Schonlau and
                  William J. Welch},
  title        = {Efficient Global Optimization of Expensive Black-Box Functions},
  journal      = {J. Glob. Optim.},
  volume       = {13},
  number       = {4},
  pages        = {455--492},
  year         = {1998},
  timestamp    = {Fri, 11 Sep 2020 13:04:22 +0200},
  biburl       = {https://dblp.org/rec/journals/jgo/JonesSW98.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{Cowen-Rivers2022,
author = {Cowen-Rivers, Alexander and Lyu, Wenlong and Tutunov, Rasul and Wang, Zhi and Grosnit, Antoine and Griffiths, Ryan-Rhys and Maravel, Alexandre and Hao, Jianye and Wang, Jun and Peters, Jan and Bou Ammar, Haitham},
year = {2022},
month = {07},
pages = {},
title = {HEBO: Pushing The Limits of Sample-Efficient Hyperparameter Optimisation},
volume = {74},
journal = {Journal of Artificial Intelligence Research}
}


@article{BalandatKJDLB,
  author       = {Maximilian Balandat and
                  Brian Karrer and
                  Daniel R. Jiang and
                  Samuel Daulton and
                  Benjamin Letham and
                  Andrew Gordon Wilson and
                  Eytan Bakshy},
  title        = {BoTorch: Programmable {Bayesian} Optimization in {PyTorch}},
  journal      = {CoRR},
  volume       = {abs/1910.06403},
  year         = {2019},
  eprinttype    = {arXiv},
  eprint       = {1910.06403},
  timestamp    = {Wed, 16 Oct 2019 16:25:53 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-06403.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{AwadMH21,
  author       = {Noor H. Awad and
                  Neeratyoy Mallik and
                  Frank Hutter},
  editor       = {Zhi{-}Hua Zhou},
  title        = {{DEHB:} Evolutionary Hyberband for Scalable, Robust and Efficient
                  Hyperparameter Optimization},
  booktitle    = {IJCAI'21: The proceedings of the 2021 International Joint Conference on Artificial Intelligence},
  pages        = {2147--2153},
  publisher    = {ijcai.org},
  year         = {2021},
  timestamp    = {Wed, 25 Aug 2021 17:11:16 +0200},
  biburl       = {https://dblp.org/rec/conf/ijcai/AwadMH21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{GPflowOpt2017,
   author = {Knudde, Nicolas and {van der Herten}, Joachim and Dhaene, Tom and Couckuyt, Ivo},
    title = "{{GP}flow{O}pt: {A} {B}ayesian {O}ptimization {L}ibrary using Tensor{F}low}",
  journal = {arXiv preprint -- arXiv:1711.03845},
  year    = {2017},
}


@inproceedings{WistubaSS15b,
  author       = {Martin Wistuba and
                  Nicolas Schilling and
                  Lars Schmidt{-}Thieme},
  title        = {Hyperparameter Search Space Pruning - A New Component for Sequential Model-Based Hyperparameter Optimization},
  booktitle    = {ECML/PKDD'15: Proc of the 2015 Advances in Machine Learning and Knowledge Discovery in Databases},
  volume       = {9285},
  pages        = {104--119},
  year         = {2015},
  timestamp    = {Mon, 30 Nov 2020 08:47:26 +0100},
  biburl       = {https://dblp.org/rec/conf/pkdd/WistubaSS15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{PerroneS19,
  author       = {Valerio Perrone and
                  Huibin Shen},
  title        = {Learning search spaces for {Bayesian} optimization: Another view of
                  hyperparameter transfer learning},
  booktitle    = {NIPS'19: Proc of the 2019 Advances in Neural Information Processing Systems},
  pages        = {12751--12761},
  year         = {2019},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/PerroneS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}


@inproceedings{ChenSLW0DKKDRPF22,
  author       = {Yutian Chen and
                  Xingyou Song and
                  Chansoo Lee and
                  Zi Wang and
                  Richard Zhang and
                  David Dohan and
                  Kazuya Kawakami and
                  Greg Kochanski and
                  Arnaud Doucet and
                  Marc'Aurelio Ranzato and
                  Sagi Perel and
                  Nando de Freitas},
  title        = {Towards Learning Universal Hyperparameter Optimizers with Transformers},
  booktitle    = {NIPS'22: Proc of the 2022 Advances in Neural Information Processing Systems},
  year         = {2022},
  timestamp    = {Thu, 11 May 2023 17:08:22 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/ChenSLW0DKKDRPF22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{BergstraBBK11,
  author       = {James Bergstra and
                  R{\'{e}}mi Bardenet and
                  Yoshua Bengio and
                  Bal{\'{a}}zs K{\'{e}}gl},
  title        = {Algorithms for Hyper-Parameter Optimization},
  booktitle    = {NIPS'11: Proc. of the 2011 Advances in Neural Information Processing Systems.},
  pages        = {2546--2554},
  year         = {2011},
  timestamp    = {Mon, 16 May 2022 15:41:51 +0200},
  biburl       = {https://dblp.org/rec/conf/nips/BergstraBBK11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{MullerFHH23,
  author       = {Samuel M{\"{u}}ller and
                  Matthias Feurer and
                  Noah Hollmann and
                  Frank Hutter},
  title        = {PFNs4BO: In-Context Learning for {Bayesian} Optimization},
  booktitle    = {ICML'23: Proc of the International Conference on Machine Learning},
  volume       = {202},
  pages        = {25444--25470},
  publisher    = {{PMLR}},
  year         = {2023},
  timestamp    = {Mon, 28 Aug 2023 17:23:08 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/0005FHH23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@book{RasmussenW06,
    author    = {Carl Edward Rasmussen and
                 Christopher K. I. Williams},
    title     = {{Gaussian} processes for machine learning},
    publisher = {{MIT} Press},
    year      = {2006},
    isbn      = {026218253X},
    timestamp = {Wed, 26 Apr 2017 17:48:08 +0200},
    biburl    = {https://dblp.org/rec/bib/books/lib/RasmussenW06},
    bibsource = {dblp computer science bibliography, https://dblp.org}
}

@book{Garnett23,
    author    = {Roman Garnett},
    title     = {{Bayesian} Optimization},
    publisher = {Cambridge University Press},
    year      = {2023},
    month     = {January},
    isbn      = {9781108348973}
}


@book{johnson1985,
  title={The critical difference: Essays in the contemporary rhetoric of reading},
  author={Johnson, Barbara},
  year={1985},
  publisher={JHU Press}
}

@book{MichieST94,
  author       = {Donald Michie and
                  David J. Spiegelhalter and
                  Charles C. Taylor},
  title        = {Machine Learning, Neural and Statistical Classification},
  publisher    = {Ellis Horwood},
  year         = {1994},
  biburl       = {https://dblp.org/rec/books/eh/MichieST94.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@book{LeskovecRU14,
  author       = {Jure Leskovec and
                  Anand Rajaraman and
                  Jeffrey D. Ullman},
  title        = {Mining of Massive Datasets, 2nd Ed},
  publisher    = {Cambridge University Press},
  year         = {2014},
  isbn         = {978-1107077232},
  timestamp    = {Wed, 10 Jul 2019 10:47:04 +0200},
  biburl       = {https://dblp.org/rec/books/cu/LeskovecRU14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{TuRKSST22,
  author       = {Renbo Tu and
                  Nicholas Roberts and
                  Mikhail Khodak and
                  Junhong Shen and
                  Frederic Sala and
                  Ameet Talwalkar},
  title        = {NAS-Bench-360: Benchmarking Neural Architecture Search on Diverse
                  Tasks},
  booktitle    = {nips'22: Proc. of the 35th Annual Conference
                  on Neural Information Processing Systems},
  year         = {2022},
  timestamp    = {Mon, 08 Jan 2024 16:31:37 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/TuRKSST22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{FreundS95,
  author       = {Yoav Freund and
                  Robert E. Schapire},
  title        = {A decision-theoretic generalization of on-line learning and an application
                  to boosting},
  booktitle    = {EuroCOLT'95: Proc. of the Second Computational Learning Theory,},
  series       = {Lecture Notes in Computer Science},
  volume       = {904},
  pages        = {23--37},
  publisher    = {Springer},
  year         = {1995},
  timestamp    = {Tue, 14 May 2019 10:00:53 +0200},
  biburl       = {https://dblp.org/rec/conf/eurocolt/FreundS95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@article{JSSv033i01,
 author        = {Friedman, Jerome H. and Hastie, Trevor and Tibshirani, Rob},
 title         = {Regularization Paths for Generalized Linear Models via Coordinate Descent},
 volume        = {33},
 number        = {1},
 journal       = {Journal of Statistical Software},
 year          = {2010},
 pages         = {1–22}
}


@article {ShankerBHK23,
author         = {Shanker, Varun R. and Bruun, Theodora U.J. and Hie, Brian L. and Kim, Peter S.},
title          = {Inverse folding of protein complexes with a structure-informed language model enables unsupervised antibody evolution},
year           = {2023},
publisher      = {Cold Spring Harbor Laboratory},
journal        = {bioRxiv}
}

@phdthesis{Neal95,
  author       = {Radford M. Neal},
  title        = {Bayesian learning for neural networks},
  school       = {University of Toronto, Canada},
  year         = {1995},
  timestamp    = {Wed, 10 Aug 2022 16:24:08 +0200},
  biburl       = {https://dblp.org/rec/phd/ca/Neal95.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}

@misc{Dua19,
  author = {Dheeru Dua and Casey Graff},
  title = {UCI Machine Learning Repository},
  year = {2019},
  url = {http://archive.ics.uci.edu/ml},
  institution = {University of California, Irvine, School of Information and Computer Sciences}
}

@misc{ROBERTGA21, 
  author      = {ROBERT, Philippe and Greiff, Victor and Akbar, Rahmad},
  title       = {Absolut! in silico antibody-antigen binding database},
  publisher   = {Archive2014},
  year        = {2021}
}


================================================
FILE: docs/source/usage/algorithms.rst
================================================
Algorithmic objects
===================

.. admonition:: Overview
   :class: info
   
   - :ref:`Register <register-new-algorithm>`: How to register a new algorithmic Object to :ref:`TransOPT <home>`
   - :ref:`Supported Algorithms <alg>`: The list of the synthetic problems available in :ref:`TransOPT <home>`
   - :ref:`Algorithmic Objects<alg-obj>`: The list of the protein inverse folding problems available in :ref:`TransOPT <home>`


.. _register-new-algorithm:

Registering a New Algorithm in TransOPT
---------------------------------------

To register a new algorithm object in TransOPT, follow the steps outlined below:

1. **Import the Model Registry**

   First, you need to import the `model_registry` from the `transopt.agent.registry` module:

   .. code-block:: python

      from transopt.agent.registry import model_registry

2. **Define the Algorithm Object Name**

   Next, use the registry to define the name of your algorithm object. For example:

   .. code-block:: python

      @model_registry.register("MHGP")
      class MHGP(Model):
          pass

   In this example, the algorithm object is named "MHGP".

3. **Choose the Appropriate Base Class**

   Depending on the type of algorithm object you are creating, you must inherit from a specific base class. TransOPT provides several algorithm modules, each corresponding to a different base class:

   - **Surrogate Model**: Inherit from the `Model` class.
   - **Initialization Design**: Inherit from the `Sampler` class.
   - **Acquisition Function**: Inherit from the `AcquisitionBase` class.
   - **Pretrain Module**: Inherit from the `PretrainBase` class.
   - **Normalizer Module**: Inherit from the `NormalizerBase` class.

   For instance, in the example provided, we are creating a surrogate model, so the `MHGP` class inherits from the `Model` base class.

4. **Implement the Required Abstract Methods**

   Once the class is defined, you need to implement several abstract methods that are required by the `Model` base class. These methods include:

   .. code-block:: python

      def meta_fit(
          self,
          source_X : List[np.ndarray],
          source_Y : List[np.ndarray],
          optimize: Union[bool, Sequence[bool]] = True,
      ):
          pass

      def fit(
          self,
          X: np.ndarray,
          Y: np.ndarray,
          optimize: bool = False,
      ):
          pass

      def predict(
          self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
      ) -> Tuple[np.ndarray, np.ndarray]:
          pass

   - **meta_fit**: This method is used to fit meta-data. If your transfer optimization algorithm requires meta-data, this is where you should leverage it.
   - **fit**: This method is used to fit the data for the current task.

By following these steps, you can successfully register a new algorithm object in TransOPT and implement the necessary functionality to integrate it into the framework.


.. _alg:

Supported Algorithms
--------------------

Search space transform
^^^^^^^^^^^^^^^^^^^^^^
**Hyperparameter Search Space Pruning – A New Component for Sequential Model-Based Hyperparameter Optimization**:cite:`WistubaSS15b`

This method prunes ineffective regions of the hyperparameter search space by using past evaluations to guide the optimization. It identifies areas with low potential by analyzing the performance of sampled configurations and employing a surrogate model to predict future outcomes. Regions that consistently show poor performance or low expected improvement are marked as low potential. The method then updates the search process to focus on more promising regions, thereby improving optimization efficiency and reducing unnecessary evaluations.

**Learning search spaces for Bayesian optimization- Another view of hyperparameter transfer learning**:cite:`PerroneS19`

The method replaces predefined search space with data-driven geometrical representations (e.g., ellipsoids and boxes) by analyzing historical data to identify high-performing regions and fitting these regions with geometrical shapes. This transformation narrows the search to promising areas, improving efficiency as the search space dimension increases.

Initialization Design
^^^^^^^^^^^^^^^^^^^^^^
**FEW-SHOT BAYESIAN OPTIMIZATION WITH DEEP KERNEL SURROGATES**:cite:`WistubaG21`

This method leverages historical task data and an evolutionary algorithm to provide a warm-start initialization. By selecting hyperparameter settings that minimize a loss function across multiple tasks, the method accelerates optimization with fewer evaluations. 

**Initializing Bayesian Hyperparameter Optimization via Meta-Learning**:cite:`FeurerSH15`

This method introduces a meta-learning-based initialization for BO, improving the starting point by leveraging hyperparameter configurations that worked well on similar datasets. These similar datasets are identified through meta-features. The method calculates the distance between datasets using these meta-features, selecting the most similar ones to initialize the optimization process efficiently.

**Learning Hyperparameter Optimization Initializations**:cite:`WistubaSS15a`

This method proposes to use a meta-loss function that is minimized through gradient-based optimization. By optimizing for a meta-loss derived from the response functions of past datasets, it generates entirely new configurations, whereas prior methods limited themselves to reusing configurations in similar datasets.

Surrogate Model
^^^^^^^^^^^^^^^^^^^^^^
**Pre-trained Gaussian processes for Bayesian optimization**:cite:`Wang2021`

In this method, the surrogate model is built on a pre-trained GP with data from related tasks. This approach uses a KL divergence-based loss function to pre-train the GP, ensuring it captures similarities between the target function and past data. The pre-trained GP serves as the prior for BO, allowing the model to make better predictions with fewer observations by leveraging the pre-trained knowledge.

**FEW-SHOT BAYESIAN OPTIMIZATION WITH DEEP KERNEL SURROGATES**

In this method, the surrogate model is a deep kernel Gaussian process that is meta-learned across multiple past tasks. This model enables quick adaptation to new tasks with limited evaluations. The deep kernel, which combines a neural network and a Gaussian process, provides uncertainty estimates, helping the model generalize across diverse tasks while being fine-tuned for new ones.

**Google Vizier- A Service for Black-Box Optimization**:cite:`GolovinSMKKS17`

This method transfers source knowledge by using the posterior mean of the source task as the prior mean for the target task. This approach simplifies the transfer process by ignoring uncertainty from the source model and only leveraging the mean, which leads to reduced computational complexity while still incorporating valuable information from the source task. 

**PFNs4BO- In-Context Learning for Bayesian Optimization**:cite:`MullerFHH23`

This method utilizes a Transformer-based architecture called Prior-data Fitted Networks (PFNs). These networks are trained on synthetic datasets to approximate the posterior predictive distribution (PPD) through in-context learning. PFNs can be trained on any efficiently sampled prior distribution, such as Gaussian processes or Bayesian neural networks. By learning from diverse priors, the PFN surrogate model captures complex patterns in the optimization process, allowing it to make accurate predictions while maintaining flexibility to incorporate user-defined priors or handle spurious dimensions effectively.

**Scalable Gaussian process-based transfer surrogates for hyperparameter optimization**:cite:`WistubaSS18`

This method introduces an ensemble of GP, where each GP is trained on a different past task. The model uses a weighted sum approach to combine the predictions from each GP. The weights are assigned based on how well each GP predicts the target task, with more relevant models receiving higher weights. 

**Scalable Meta-Learning for Bayesian Optimization using Ranking-Weighted Gaussian Process Ensembles**:cite:`FeurerBE15`

This method introduces Ranking-Weighted Gaussian Process Ensembles (RGPE). Similar to previous approaches, the surrogate model combines an ensemble of GPs. However, in RGPE, the weights are determined using a ranking loss function, which assesses how effectively each GP ranks the observations from the current task. GPs that rank the observations more accurately are assigned higher weights, reflecting their greater relevance to the task at hand.

**Multi-Task Bayesian Optimization**:cite:`SwerskySA13`

This method uses multi-task Gaussian processes (MTGP) as the surrogate model. It trains a GP for each task and uses a shared covariance structure across tasks to improve predictive accuracy. By leveraging the relationships between tasks, the MTGP reduces the need for independent function evaluations, making the optimization process faster and more efficient.

**Multi-Fidelity Bayesian Optimization via Deep Neural Networks**:cite:`LiXKZ20`

In this method, the surrogate model employs a deep neural network designed to handle multi-fidelity optimization tasks. The DNN surrogate models each fidelity with a neural network, and higher fidelities are conditioned on the outputs from lower fidelities. By stacking neural networks for each fidelity level, the model captures nonlinear relationships between different fidelities. This structure allows the surrogate to propagate information across fidelities, improving the accuracy of function estimation at higher fidelities while reducing computational costs.

**BOHB: robust and efficient hyperparameter optimization at scale**:cite:`FalknerKH18`

In this method, the surrogate model uses a Tree-structured Parzen Estimator (TPE) to model the hyperparameter space. TPE builds separate probability models for good and bad configurations using kernel density estimation. The TPE model guides the search by maximizing the ratio between these models, effectively focusing on promising regions of the search space. 

Acquisition Function
^^^^^^^^^^^^^^^^^^^^
**Scalable Meta-Learning for Bayesian Optimization using Ranking-Weighted Gaussian Process Ensembles**

In RGPE, the acquisition function follows standard BO methods but integrates the ranking-weighted ensemble model. The ensemble combines predictions from multiple GPs, each weighted based on its ranking performance in relation to the current task. The acquisition function then uses this weighted ensemble to balance exploration and exploitation, ensuring that the most relevant past models are given greater influence when selecting the next point to evaluate 

**Scalable Gaussian process-based transfer surrogates for hyperparameter optimization**

This approach is referred to as the *transfer acquisition function* (TAF). The acquisition function balances exploration and exploitation by combining the predicted improvement from the new data with predicted improvements from previous tasks, weighted by their relevance. The weights are calculated the same as the model.

**Multi-Task Bayesian Optimization**

In this method, the acquisition function extends the standard EI criterion to the multi-task setting. It dynamically selects which task to evaluate by considering the correlation between tasks. The acquisition function maximizes information gain per unit cost by balancing the evaluation of cheaper auxiliary tasks with more expensive primary tasks, using the entropy search strategy. 

**Multi-Fidelity Bayesian Optimization via Deep Neural Networks**

It aims to maximize the mutual information between the predicted maximum of the objective function and the next point to be evaluated. The acquisition function selects the input location and fidelity level that provide the highest benefit-cost ratio. By employing fidelity-wise moment matching and Gauss-Hermite quadrature to approximate the posterior distributions, the acquisition function ensures that both fidelity selection and input sampling are computationally efficient and well-informed.

**BOHB:Robust and Efficient Hyperparameter Optimization at Scale**

It selects new configurations by maximizing the expected improvement, using kernel density estimates of good and bad configurations. BOHB combines this with a multi-fidelity approach, which allows the acquisition function to operate across different budget levels, efficiently balancing exploration and exploitation while scaling to large optimization tasks

**Reinforced Few-Shot Acquisition Function Learning for Bayesian Optimization**:cite:`HsiehHL21`

In this method, the acquisition function is modeled with a deep Q-network (DQN), learning to balance exploration and exploitation as a reinforcement learning task. The DQN predicts sampling utility based on the posterior mean and variance, refined by a Bayesian variant that incorporates uncertainty to avoid overfitting.


.. _alg-obj:

List of Algorithmic Objects
---------------------------
The optimization framework includes a variety of state-of-the-art algorithms, each designed with specific features to address different classes of optimization problems. The table below provides a summary of the key algorithms available, categorized by their class, convenience for use, targeted objective(s), and any constraints they impose.

.. csv-table::
   :header: "Algorithmic Objects", "Type", "Source Algorithm"
   :widths: 60, 10, 100
   :file: algorithms.csv


References
----------

.. bibliography:: TOS.bib
   :style: plain

================================================
FILE: docs/source/usage/cli.rst
================================================
.. _command_line_usage:

Command Line
===============================

TransOPT provides a command-line interface (CLI) that allows users to define and run optimization tasks directly from the terminal. This is facilitated by the `run_cli.py` script, which supports a wide range of customizable parameters.

Running the Command-Line Interface
----------------------------------

To run the `run_cli.py` script, navigate to the directory containing the script and use the following command:

.. code-block:: bash

   python transopt/agent/run_cli.py [OPTIONS]

Where `[OPTIONS]` are the command-line arguments you can specify to customize the behavior of TransOPT.

I. Command-Line Arguments
^^^^^^^^^^^^^^^^^^^^^^^^^
Here is a list of the main command-line arguments supported by the script:

**Task Configuration**

- **`-n, --task_name`**: Name of the task (default: `"Sphere"`).
- **`-v, --num_vars`**: Number of variables (default: `2`).
- **`-o, --num_objs`**: Number of objectives (default: `1`).
- **`-f, --fidelity`**: Fidelity level of the task (default: `""`).
- **`-w, --workloads`**: Workloads associated with the task (default: `"0"`).
- **`-bt, --budget_type`**: Type of budget (e.g., `"Num_FEs"`) (default: `"Num_FEs"`).
- **`-b, --budget`**: Budget for the task, typically the number of function evaluations (default: `100`).

**Optimizer Configuration**

- **`-sr, --space_refiner`**: Space refiner method (default: `"None"`).
- **`-srp, --space_refiner_parameters`**: Parameters for the space refiner (default: `""`).
- **`-srd, --space_refiner_data_selector`**: Data selector for the space refiner (default: `"None"`).
- **`-srdp, --space_refiner_data_selector_parameters`**: Parameters for the data selector (default: `""`).
- **`-sp, --sampler`**: Sampling method (default: `"random"`).
- **`-spi, --sampler_init_num`**: Initial number of samples (default: `22`).
- **`-spp, --sampler_parameters`**: Parameters for the sampler (default: `""`).
- **`-spd, --sampler_data_selector`**: Data selector for the sampler (default: `"None"`).
- **`-spdp, --sampler_data_selector_parameters`**: Parameters for the sampler's data selector (default: `""`).
- **`-pt, --pre_train`**: Pretraining method (default: `"None"`).
- **`-ptp, --pre_train_parameters`**: Parameters for pretraining (default: `""`).
- **`-ptd, --pre_train_data_selector`**: Data selector for pretraining (default: `"None"`).
- **`-ptdp, --pre_train_data_selector_parameters`**: Parameters for the pretraining data selector (default: `""`).
- **`-m, --model`**: Model used for optimization (default: `"GP"`).
- **`-mp, --model_parameters`**: Parameters for the model (default: `""`).
- **`-md, --model_data_selector`**: Data selector for the model (default: `"None"`).
- **`-mdp, --model_data_selector_parameters`**: Parameters for the model's data selector (default: `""`).
- **`-acf, --acquisition_function`**: Acquisition function used (default: `"EI"`).
- **`-acfp, --acquisition_function_parameters`**: Parameters for the acquisition function (default: `""`).
- **`-acfd, --acquisition_function_data_selector`**: Data selector for the acquisition function (default: `"None"`).
- **`-acfdp, --acquisition_function_data_selector_parameters`**: Parameters for the acquisition function's data selector (default: `""`).
- **`-norm, --normalizer`**: Normalization method (default: `"Standard"`).
- **`-normp, --normalizer_parameters`**: Parameters for the normalizer (default: `""`).
- **`-normd, --normalizer_data_selector`**: Data selector for the normalizer (default: `"None"`).
- **`-normdp, --normalizer_data_selector_parameters`**: Parameters for the normalizer's data selector (default: `""`).

**General Configuration**

- **`-s, --seeds`**: Random seed for reproducibility (default: `0`).

II. Example Usage
^^^^^^^^^^^^^^^^^
Below are some example commands demonstrating how to use the CLI to run different tasks with varying configurations.

**Example 1: Running a basic task with default parameters**

.. code-block:: bash

   python transopt/agent/run_cli.py -n MyTask -v 3 -o 1 -b 200

**Example 2: Running a task with a specific model and acquisition function**

.. code-block:: bash

   python transopt/agent/run_cli.py -n MyTask -v 3 -o 2 -m RF -acf UCB -b 300

**Example 3: Using custom parameters for the space refiner and sampler**

.. code-block:: bash

   python transopt/agent/run_cli.py -n MyTask -sr "Prune"  -sp "lhs" -spi 30 -b 300

III. Additional Notes
^^^^^^^^^^^^^^^^^^^^^
- The **random seed** is particularly important for ensuring that the results are reproducible. Make sure to specify the `--seeds` option if you want to run experiments that can be exactly replicated.
- TransOPT's CLI is highly flexible, allowing you to tailor the optimization process to your specific needs by adjusting the parameters and options provided.

By following the instructions above, you can effectively use the TransOPT CLI to run and manage your optimization tasks.


================================================
FILE: docs/source/usage/data_manage.rst
================================================
Data Management
===============

The `datamanager` module is designed to manage data generated during optimization tasks. It provides a structured approach for storing, querying, and transferring data across different optimization scenarios. This module is built with flexibility in mind, enabling efficient management of various optimization task requirements, from persistent storage to similarity-based searches.

The primary roles of the `datamanager` include:

- **Data Storage**: Utilizes a database-backed storage system to persist data generated throughout the optimization process, ensuring that configurations, results, and metadata are readily accessible.
- **Checkpointing**: Allows for saving the state of optimization tasks at specific points, facilitating recovery and continuation from those points in case of interruptions.
- **Flexible Search Mechanisms**: Incorporates both metadata-based searches for filtering and querying task information and Locality-Sensitive Hashing (LSH) for identifying similar data points within large datasets.

This combination of features makes the `datamanager` particularly valuable in scenarios where optimization processes are iterative and data-intensive, requiring a balance between precise control over individual task data and the ability to draw insights from historical records.

Data Storage and Management
---------------------------

The `datamanager` module's core lies in its robust data storage and management capabilities, allowing it to efficiently handle the various data generated during optimization tasks. At its heart, it utilizes an SQLite database to persistently store task-related data, configurations, and metadata, ensuring that important information is retained across task iterations.

Data Storage Mechanism
**********************
The `datamanager` uses SQLite as a backend for storing data, chosen for its lightweight and self-contained nature, making it suitable for scenarios where a full-fledged database system may be unnecessary. It supports various data formats for easy integration:

- **Dictionary**: Individual task data can be stored as dictionaries, with keys representing field names and values representing corresponding data.
- **List**: Supports lists of dictionaries or lists for batch insertion, allowing multiple records to be added in a single operation.
- **DataFrame/NumPy Arrays**: For more complex data structures, `datamanager` can insert rows from pandas DataFrames or NumPy arrays, automatically converting these into a database-compatible format.

This flexibility allows the `datamanager` to adapt to different data needs and store both simple and complex optimization data structures seamlessly.

Metadata Table Design
*********************
A key feature of the `datamanager` is its use of metadata tables to store descriptive information about each optimization task, enabling more efficient querying and organization of data. The `_metadata` table is specifically designed to record detailed information about each stored optimization task, such as:

- **table_name**: The name of the table where task data is stored, serving as the primary key.
- **problem_name**: The name of the optimization problem, providing context for the stored task.
- **dimensions**: The number of dimensions involved in the optimization problem.
- **objectives**: The number of objectives being optimized.
- **fidelities**: A textual representation of various fidelities or resolution levels within the task.
- **budget_type** and **budget**: Information about the type and limits of the budget used in the optimization.
- **space_refiner**, **sampler**, **model**, **pretrain**, **acf**, **normalizer**: Parameters related to the methodologies and models used in the optimization process.
- **dataset_selectors**: A JSON-encoded structure representing the criteria for dataset selection.

The `_metadata` table serves as a centralized index of all stored tasks, allowing users to quickly retrieve and filter tasks based on their descriptive attributes.

Data Storage Flexibility
************************
The design of the `datamanager` ensures that data storage remains flexible and adaptable. By supporting multiple input formats and allowing for dynamic database interactions, the module can handle diverse data requirements across different optimization tasks. This adaptability is crucial for optimization scenarios where the nature of the data may change based on the problem domain or the phase of the optimization process.

Through its use of a lightweight SQLite database, combined with a rich set of metadata, the `datamanager` provides a powerful yet simple way to manage and organize the data generated during optimization, laying the groundwork for more complex functionalities such as checkpointing and similarity-based searches.


Similarity Search and Data Reuse
--------------------------------

The `datamanager` module includes advanced functionality for identifying similar data points and facilitating data reuse between optimization tasks with similar characteristics. This is particularly valuable in scenarios where insights from past optimization runs can be leveraged to accelerate new tasks, reducing the time and computational effort needed to reach optimal solutions.

Similarity Search Mechanisms
****************************
The `datamanager` uses a combination of Locality-Sensitive Hashing (LSH) and MinHash techniques to perform similarity searches across stored optimization data. These techniques allow for efficient identification of data points that are similar but not identical, supporting exploratory optimization where near-optimal solutions can inform new tasks.

- **Locality-Sensitive Hashing (LSH)**: LSH is employed to map similar data points into the same hash buckets with high probability. This approach reduces the dimensionality of the data and enables fast similarity searches by grouping data that are likely to be similar into the same buckets.
- **MinHash Signatures**: MinHash is used to generate compact signatures for high-dimensional data (such as configuration states or text data). These signatures make it possible to estimate the similarity between data points by comparing their hashed values, which approximates the Jaccard similarity between sets.

Integration with the Manager for Task Similarity
************************************************
Within the `manager`, the LSH mechanism is used to identify optimization tasks that share similar characteristics. This process involves creating a concise representation, or "vector," of each task based on key properties like the number of variables, number of objectives, and descriptive information about the task.

- **Vector Representation of Tasks**: To enable comparison, each optimization task is represented by a vector that summarizes important aspects of the task, such as:
  - The complexity of the problem (e.g., number of variables and objectives).
  - Descriptive details about the variables, which can capture the nature of the problem.
  - The task's name or type, helping categorize similar tasks.

  This vector encapsulates the essential details of each optimization task, providing a structured way to describe the nature of the problem. By converting tasks into such standardized vectors, it becomes possible to use LSH to efficiently identify similar tasks.

- **Similarity Search Process**: The LSH mechanism uses these vectors to map tasks into clusters or hash buckets, where tasks with similar vectors are more likely to be grouped together. When a new task or query vector is introduced, the `manager` can quickly identify past tasks that fall into the same bucket, indicating similarity based on the problem structure and configuration.

  This allows the `datamanager` to efficiently locate tasks with similar characteristics, enabling reuse of past results or configurations that may be relevant to the new task.

Applications of Similarity-Based Data Reuse
*******************************************
The similarity search and data reuse capabilities of the `datamanager` provide significant advantages in various optimization scenarios:

- **Warm Start for Optimization**: By starting a new task with configurations similar to those that were effective in past tasks, users can perform "warm starts" that speed up convergence.
- **Adaptive Optimization**: For tasks that require adjusting optimization parameters over time, the ability to find and utilize past similar configurations ensures that the adjustments are more efficient.
- **Transfer Learning in Optimization**: When optimization tasks vary slightly across iterations (e.g., changing problem parameters or objectives), the `datamanager` helps carry over useful information from previous runs, acting as a form of transfer learning in optimization.

The integration of vector-based representations with LSH makes the `datamanager` a powerful tool for scenarios where similarity and reuse are critical. It enables users to not only store and manage data but also to leverage historical results for more efficient optimization processes.


Flexible Data Querying
----------------------

The `datamanager` module is equipped with versatile data querying capabilities, allowing users to perform both precise and approximate searches based on the needs of their optimization tasks. This flexibility ensures that data can be accessed efficiently, whether the goal is to find an exact match or to identify records that satisfy broader criteria.

Metadata-Based Search
*********************
A key feature of the `datamanager` is its ability to perform metadata-based searches. This type of search leverages the detailed metadata stored about each optimization task, enabling users to filter and retrieve data based on descriptive attributes such as task name, problem type, and configuration settings.

- **Dynamic Query Generation**: The `datamanager` constructs SQL `WHERE` clauses dynamically based on user-provided search criteria, allowing for flexible and complex queries. Users can specify one or multiple metadata fields as search parameters, and the module generates the appropriate SQL query to retrieve matching records.
- **Example**: Using the `search_tables_by_metadata` method, users can search for optimization tasks based on criteria such as `problem_name`, `budget_type`, or other attributes stored in the `_metadata` table:
  
  .. code-block:: python
  
      search_params = {
          "problem_name": "example_problem",
          "budget_type": "fixed"
      }
      matching_tables = datamanager.search_tables_by_metadata(search_params)

  This example retrieves the names of all tables associated with optimization tasks that match the given `problem_name` and `budget_type`, making it easier to filter data relevant to specific problem settings.

Precise Search and Filtering
****************************
While metadata-based search provides flexibility, the `datamanager` also supports precise search and filtering operations. These are particularly useful when specific task data needs to be retrieved without approximation:

- **Primary Key and Index-Based Search**: For tasks or configurations that are uniquely identified by fields like `table_name` or `task_id`, the `datamanager` uses indexed fields for fast lookups, ensuring that searches for individual records are highly efficient.
- **SQL Query Support**: Users can execute custom SQL queries to interact directly with the database, giving them complete control over the data retrieval process. This is useful when complex joins, aggregations, or advanced filtering conditions are required.
- **Example**: A precise search can be performed to retrieve data entries associated with a specific task ID:

  .. code-block:: python
  
      query = "SELECT * FROM _metadata WHERE table_name = 'task_example'"
      task_data = datamanager.execute(query, fetchall=True)

  This example demonstrates how to execute a custom SQL query to retrieve all metadata associated with a specific task table.

Combining Flexible and Precise Queries
**************************************
The true strength of the `datamanager` lies in its ability to combine flexible metadata-based search with precise data retrieval. This enables users to start with broad criteria to identify relevant tasks and then drill down into specific records for deeper analysis.

- **Hybrid Search Strategies**: Users can first use metadata-based queries to identify relevant tasks and then apply precise searches to extract detailed data from those tasks.
- **Example Workflow**:
  
  1. Use `search_tables_by_metadata` to find all tasks that match certain criteria (e.g., problem type).
  2. Iterate over the results and use SQL queries to retrieve detailed data from each identified task.

  .. code-block:: python
  
      search_params = {"problem_name": "example_problem"}
      tables = datamanager.search_tables_by_metadata(search_params)
      for table in tables:
          data = datamanager.execute(f"SELECT * FROM {table} WHERE objectives = 2", fetchall=True)

  This workflow allows users to identify relevant optimization tasks and then extract specific entries from each, providing a balance between breadth and depth in data retrieval.

The flexibility of the `datamanager`'s querying capabilities ensures that users can adapt their data retrieval strategies to their specific needs, whether they require a broad overview of multiple tasks or a detailed analysis of a single task. This makes it an indispensable tool for managing data in complex optimization environments.


Integration with Optimization Tasks
-----------------------------------

The `datamanager` module is designed to integrate smoothly with optimization task workflows, providing an interface for storing, retrieving, and managing data throughout the lifecycle of these tasks. It acts as the central repository for task configurations, results, and intermediate data, enabling easy access and modification during different stages of optimization.

Task Data Flow Management
*************************
The `datamanager` plays a crucial role in managing the flow of data during the execution of optimization tasks. It ensures that data is stored in a structured manner, facilitating efficient access and modification throughout the optimization process. Key aspects include:

- **Storing Initial Configurations**: When an optimization task starts, the initial configurations and parameters can be stored in the `datamanager`, creating a record of the starting point.
- **Recording Intermediate Results**: As the optimization progresses, intermediate results and states can be stored, allowing users to analyze the trajectory of the process.
- **Saving Final Outcomes**: Once the optimization task concludes, the final configurations and results are saved, creating a comprehensive record of the optimization process.

This structured approach to data storage ensures that all phases of the optimization process are properly recorded, making it easy to track changes and analyze outcomes.

Interaction with Optimization Algorithms
****************************************
The `datamanager` acts as a data backend that stores and retrieves task-related data as needed, allowing optimization algorithms to make data-driven decisions:

- **Configuration Retrieval**: Optimization algorithms can query the `datamanager` to retrieve specific configurations or parameters that were effective in past tasks.
- **Logging Adjustments**: As algorithms adjust parameters or explore new solutions, they can store updated configurations, ensuring that each adjustment is logged.

These interactions ensure that the optimization process is well-documented and that historical data can be leveraged effectively, enhancing the decision-making process of optimization algorithms.


Design Considerations
---------------------

The design of the `datamanager` module is driven by the need to balance flexibility, performance, and scalability in handling optimization task data. This section outlines the key design considerations that guided the development of the module, ensuring that it meets the diverse needs of users working with complex optimization problems.

Modularity and Extensibility
****************************
The `datamanager` is designed with a modular architecture, where each major function—such as data storage, similarity search, and checkpointing—is encapsulated in a dedicated component. This modular design offers several advantages:

- **Separation of Concerns**: Each component handles a specific aspect of data management, allowing users to focus on individual functionalities without being overwhelmed by the entire system.
- **Ease of Maintenance**: With separate modules for each function, updates and bug fixes can be applied to specific areas without affecting the rest of the system, making maintenance simpler.
- **Extensibility**: New features or modifications can be added without disrupting the existing functionality. For example, adding support for a different database backend or a new similarity search technique can be achieved by extending the relevant module without needing to overhaul the entire system.

Performance Optimization
************************
Given the data-intensive nature of optimization tasks, performance was a critical consideration in the design of the `datamanager`. Several strategies were employed to ensure that the module can handle large datasets and complex queries efficiently:

- **Indexed Storage**: The use of indexes on key fields in the SQLite database—such as `table_name` and `problem_name`—ensures that searches and data retrievals are fast, even when the database grows in size.
- **Batch Data Insertion**: When storing large amounts of data, the `datamanager` supports batch insertion, reducing the overhead associated with frequent database writes. This approach minimizes transaction times and optimizes the overall data storage process.
- **Efficient Similarity Computation**: By using Locality-Sensitive Hashing (LSH) and MinHash, the module avoids the computational cost of pairwise comparisons in high-dimensional space, making similarity-based searches scalable even for large datasets.

These performance optimizations ensure that the `datamanager` remains responsive and efficient, providing a smooth experience for users dealing with large-scale optimization tasks.

Scalability and Data Volume Management
**************************************
As the `datamanager` is intended for use with potentially large datasets generated by optimization tasks, scalability was a key focus during its design:

- **Scalable Database Design**: The choice of SQLite as the initial database backend provides a lightweight and self-contained solution that is sufficient for many use cases. However, the design is abstract enough to allow for future migration to more powerful database systems like PostgreSQL if the need for greater scalability arises.
- **Incremental Data Storage**: The ability to store data incrementally during optimization tasks ensures that the database grows in a controlled manner, preventing sudden spikes in storage requirements. This is especially important for long-running tasks that generate large volumes of data over time.
- **Support for Data Archiving**: To prevent the database from becoming too large and unwieldy, the `datamanager` supports archiving of old or completed task data. This ensures that the active dataset remains manageable, while older records can be preserved externally if needed for historical analysis.

These design choices make the `datamanager` suitable for both small-scale experimentation and larger, more data-intensive optimization environments, adapting to the needs of different users.

Flexibility in Data Formats and Querying
****************************************
Flexibility is a hallmark of the `datamanager`'s design, ensuring that it can adapt to various data types and query requirements across different optimization problems:

- **Support for Multiple Data Formats**: The module supports the insertion and retrieval of data in various formats, including dictionaries, lists, pandas DataFrames, and NumPy arrays. This flexibility allows users to work with their preferred data structures without needing to conform to a rigid format.
- **Customizable Search and Query Mechanisms**: Users have the freedom to perform custom SQL queries directly on the database, allowing for advanced data manipulation and retrieval. This is especially useful when standard search methods do not meet specific analysis requirements.
- **Adaptable Metadata Design**: The `_metadata` table is designed to be extensible, allowing users to add new fields that are relevant to their particular optimization tasks. This ensures that the metadata stored for each task can evolve alongside the changing needs of the optimization process.

These aspects of flexibility make the `datamanager` an adaptable tool, suitable for a wide range of use cases and optimization frameworks.

Reliability and Data Integrity
******************************
Ensuring data integrity and reliability is critical when managing optimization task data. The `datamanager` includes several mechanisms to safeguard against data loss and ensure consistency:

- **Atomic Transactions**: The use of atomic transactions in database operations ensures that data is written consistently, reducing the risk of data corruption during inserts, updates, or deletions.
- **Checkpointing for Data Safety**: The checkpointing feature serves as a safeguard, allowing users to restore a task to a previous state if issues occur, ensuring that progress is not permanently lost.
- **Data Validation**: Basic validation checks are performed before data is inserted into the database, ensuring that required fields are present and data types are correct. This prevents invalid data from entering the system and causing errors during later stages of optimization.

With these mechanisms in place, the `datamanager` is designed to maintain the reliability and integrity of the data it manages, ensuring that users can trust it as a robust data management solution for their optimization tasks.

The design considerations of the `datamanager` reflect a balance between flexibility, performance, and reliability, making it a well-rounded choice for managing the complex and evolving data needs of optimization tasks. Its modular structure, scalability, and focus on data integrity ensure that it can adapt to different challenges and provide consistent value in optimization scenarios.


================================================
FILE: docs/source/usage/problems.rst
================================================
Benchmark Problems
==================
This

.. admonition:: Overview
   :class: info

   - :ref:`Register <registering-new-problem>`: How to register a new optimization problem to :ref:`TransOPT <home>`
   - :ref:`Synthetic Problem <synthetic-problems>`: The list of the synthetic problems available in :ref:`TransOPT <home>`
   - :ref:`Hyperparameter Optimization Problem <hpo-problems>`: The list of the HPO problems available in :ref:`TransOPT <home>`
   - :ref:`Configurable Software Optimization Problem <cso-problems>`: The list of the configurable software optimization problems available in :ref:`TransOPT <home>`
   - :ref:`RNA Inverse Design Problem <rna-problems>`: The list of the RNA Inverse design problems available in :ref:`TransOPT <home>`
   - :ref:`Protein Inverse Folding Problem <pif-problems>`: The list of the protein inverse folding problems available in :ref:`TransOPT <home>`
   - :ref:`Parallelization <parallelization>`: How to parallelize function evaluations


.. _registering-new-problem:


Registering a New Benchmark Problem
-----------------------------------

To register a new benchmark problem in the TransOPT framework, follow the steps below.

I. Import the Problem Registry
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
First, you need to import the `problem_registry` from the `transopt.agent.registry` module:

.. code-block:: python

    from transopt.agent.registry import problem_registry

II. Define a New Problem Class
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Next, define a new problem class. This class should be decorated with the `@problem_registry.register("ProblemName")` decorator, where `"ProblemName"` is the unique identifier for the problem. The new problem class must inherit from one of the following base classes:

- `NonTabularProblem`
- `TabularProblem`

For example, to create a new problem named "new_problem", you would define the class as follows:

.. code-block:: python

    @problem_registry.register("new_problem")
    class new_problem(NonTabularProblem):
        pass  # Further implementation required

III. Implement Required Methods
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

After defining the class, you need to implement the following three abstract methods:

1. **get_configuration_space**: 
   This method is responsible for defining the configuration space of the new problem.

   .. code-block:: python

       def get_configuration_space(self):
           # Define and return the configuration space
           pass

2. **get_fidelity_space**: 
   This method should define the fidelity space for the problem, if applicable.

   .. code-block:: python

       def get_fidelity_space(self):
           # Define and return the fidelity space
           pass

3. **objective_function**: 
   This method evaluates the problem's objective function based on the provided configuration and other parameters.

   .. code-block:: python

       def objective_function(self, configuration, fidelity=None, seed=None, **kwargs) -> Dict:
           # Evaluate the configuration and return the results as a dictionary
           pass

Here’s an example outline of the `sphere` class:

.. code-block:: python

    @problem_registry.register("sphere")
    class sphere(NonTabularProblem):
        
      def get_configuration_space(self):
            # Define the configuration space here
         variables =  [Continuous(f'x{i}', (-5.12, 5.12)) for i in range(self.input_dim)]
         ss = SearchSpace(variables)
         return ss
        
      def get_fidelity_space(self) -> FidelitySpace:
         fs = FidelitySpace([])
         return fs

      def objective_function(self, configuration, fidelity=None, seed=None, **kwargs) -> Dict:
         # Implement the evaluation logic and return the results as a dictionary
         X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])
         y = np.sum((X) ** 2, axis=1)
         results = {'function_value': float(y)}

         return results

By following these steps, you can successfully register a new benchmark problem in the TransOPT framework.

.. _synthetic-problems:

Synthetic Problem
------------------

The synthetic problems in this section are widely used in the optimization literature for benchmarking optimization algorithms. These problems exhibit diverse characteristics and levels of complexity, making them ideal for testing the robustness and efficiency of different optimization strategies. Below is an overview of the synthetic problems included in this benchmark suite:

- **Sphere:** A simple convex problem that is often used as a baseline. The global minimum is located at the origin, and the objective function value increases quadratically with distance from the origin.

- **Rastrigin:** A non-convex problem characterized by a large number of local minima, making it challenging for optimization algorithms to find the global minimum.

- **Schwefel:** Known for its complex landscape with many local minima, the Schwefel function requires optimization algorithms to balance exploration and exploitation effectively.

- **Ackley:** A multi-modal function with a nearly flat outer region and a large hole at the center, making it difficult for algorithms to escape local minima and converge to the global minimum.

- **Levy:** A multi-modal problem with a complex landscape that tests an algorithm's ability to handle irregularities and identify global optima.

- **Griewank:** A function with many widespread local minima, making it challenging to converge to the global optimum. It is often used to assess the ability of algorithms to avoid getting trapped in local minima.

- **Rosenbrock:** A non-convex problem with a narrow, curved valley that contains the global minimum. This function is commonly used to test the convergence properties of optimization algorithms.

- **Dropwave:** A challenging multi-modal function with steep drops, requiring careful search strategies to avoid local minima.

- **Langermann:** This problem has many local minima and a highly irregular structure, testing an algorithm's ability to explore complex search spaces.

- **Rotated Hyper-Ellipsoid:** A rotated version of the ellipsoid function, which tests an algorithm's capability to optimize problems with rotated and ill-conditioned landscapes.

- **Sum of Different Powers:** A problem where each term in the sum contributes differently to the overall objective, requiring optimization algorithms to handle varying sensitivities across dimensions.

- **Styblinski-Tang:** A function with multiple global minima, commonly used to test an algorithm's ability to avoid suboptimal solutions.

- **Powell:** A problem designed to challenge optimization algorithms with a mixture of convex and non-convex characteristics across different dimensions.

- **Dixon-Price:** This function has a smooth, narrow valley leading to the global minimum, testing an algorithm’s ability to navigate such features.

- **Ellipsoid:** A test problem that features high conditioning and elliptical level sets, requiring algorithms to efficiently search in skewed spaces.

- **Discus:** A variant of the sphere function with a large difference in scale between the first variable and the rest, making it a test of handling unbalanced scales.

- **BentCigar:** A highly anisotropic function where one direction has a much larger scale than the others, challenging algorithms to adjust their search strategies accordingly.

- **SharpRidge:** This function has a sharp ridge along one dimension, testing an algorithm's ability to optimize in narrow, high-gradient regions.

- **Katsuura:** A multi-fractal function that combines periodicity and complexity, testing the capability of algorithms to explore intricate landscapes.

- **Weierstrass:** A problem with a fractal structure, characterized by a large number of local minima and requiring algorithms to handle varying scales of roughness.

- **Different Powers:** A problem where each term contributes differently to the objective, challenging algorithms to manage varying sensitivities and scales.

- **Trid:** A function that has a curved and ridge-like structure, often used to assess the convergence properties of optimization algorithms.

- **LinearSlope:** A simple linear function with a varying slope across dimensions, used to test the basic exploration capabilities of optimization methods.

- **Elliptic:** Similar to the Ellipsoid function but with exponentially increasing scales, testing an algorithm’s ability to search efficiently in poorly conditioned spaces.

- **PERM:** A complex combinatorial problem that combines different power terms, testing an algorithm’s ability to handle permutation-based search spaces.

- **Power Sum:** A problem where each dimension contributes a power sum to the objective, requiring algorithms to handle large variations in sensitivity across variables.

- **Zakharov:** A problem with a complex, non-linear interaction between variables, used to test an algorithm’s ability to navigate multi-variable coupling.

- **Six-Hump Camel:** A low-dimensional, multi-modal problem with several local minima, requiring precise search strategies to find the global optimum.

- **Michalewicz:** A problem known for its challenging steepness and periodicity, making it difficult for algorithms to locate the global minimum.

- **Moving Peak:** A dynamic optimization problem where the objective function changes over time, used to assess an algorithm’s adaptability to changing landscapes.

These problems collectively provide a comprehensive suite for evaluating optimization algorithms across a broad range of difficulties, including convexity, multi-modality, separability, and conditioning.

+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
|      Problem name       |                                                                       Mathematical formulation                                                                        |              Range                       |                               |                             |
+=========================+=======================================================================================================================================================================+==========================================+===============================+=============================+
| Sphere                  | :math:`f(\mathbf{x}) = \sum_{i=1}^d x_i^2`                                                                                                                            | :math:`x_i \in [-5.12, 5.12]`            |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Rastrigin               | :math:`f(\mathbf{x}) = 10 d + \sum_{i=1}^d \left[ x_i^2 - 10 \cos(2 \pi x_i) \right]`                                                                                 | :math:`x_i \in [-32.768, 32.768]`        |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Schwefel                | :math:`f(\mathbf{x}) = 418.9829 d - \sum_{i=1}^d x_i \sin\left(\sqrt{\left|x_i\right|}\right)`                                                                        | :math:`x_i \in [-500, 500]`              |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Ackley                  | :math:`f(\mathbf{x}) = -a \exp \left(-b \sqrt{\frac{1}{d} \sum_{i=1}^d x_i^2}\right)`                                                                                 | :math:`x_i \in [-32.768, 32.768]`        |                               |                             |
|                         | :math:`-\exp \left(\frac{1}{d} \sum_{i=1}^d \cos \left(c x_i\right)\right) + a + \exp(1)`                                                                             |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Levy                    | :math:`f(\mathbf{x}) = \sin^2\left(\pi w_1\right) + \sum_{i=1}^{d-1}\left(w_i - 1\right)^2`                                                                           | :math:`x_i \in [-10, 10]`                |                               |                             |
|                         | :math:`\left[1 + 10 \sin^2\left(\pi w_i + 1\right)\right] + \left(w_d - 1\right)^2`                                                                                   |                                          |                               |                             |
|                         | :math:`\left[1 + \sin^2\left(2 \pi w_d\right)\right], w_i = 1 + \frac{x_i - 1}{4}`                                                                                    |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Griewank                | :math:`f(\mathbf{x}) = \sum_{i=1}^d \frac{x_i^2}{4000} - \prod_{i=1}^d \cos\left(\frac{x_i}{\sqrt{i}}\right) + 1`                                                     | :math:`x_i \in [-600, 600]`              |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Rosenbrock              | :math:`f(\mathbf{x}) = \sum_{i=1}^{d-1}\left[100\left(x_{i+1} - x_i^2\right)^2 + \left(x_i - 1\right)^2\right]`                                                       | :math:`x_i \in [-5, 10]`                 |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Dropwave                | :math:`f(\mathbf{x}) = -\frac{1 + \cos\left(12 \sqrt{x_1^2 + x_2^2}\right)}{0.5\left(x_1^2 + x_2^2\right) + 2}`                                                       | :math:`x_i \in [-5.12, 5.12]`            |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Langermann              | :math:`f(\mathbf{x}) = \sum_{i=1}^m c_i \exp\left(-\frac{1}{\pi} \sum_{j=1}^d \left(x_j - A_{ij}\right)^2\right)`                                                     | :math:`x_i \in [0, 10]`                  |                               |                             |
|                         | :math:`\cos\left(\pi \sum_{j=1}^d\left(x_j - A_{ij}\right)^2\right)`                                                                                                  |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Rotated Hyper-Ellipsoid | :math:`f(\mathbf{x}) = \sum_{i=1}^d \sum_{j=1}^i x_j^2`                                                                                                               | :math:`x_i \in [-65.536, 65.536]`        |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Sum of Different Powers | :math:`f(\mathbf{x}) = \sum_{i=1}^d x_i^{i+1}`                                                                                                                        | :math:`x_i \in [-1, 1]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Styblinski-Tang         | :math:`f(\mathbf{x}) = \frac{1}{2} \sum_{i=1}^d\left(x_i^4 - 16 x_i^2 + 5 x_i\right)`                                                                                 | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Powell                  | :math:`f(\mathbf{x}) = \sum_{i=1}^{d/4}\left(x_{4i-3} + 10 x_{4i-2}\right)^2`                                                                                         | :math:`x_i \in [-4, 5]`                  |                               |                             |
|                         | :math:`+ 5\left(x_{4i-1} - x_{4i}\right)^2`                                                                                                                           |                                          |                               |                             |
|                         | :math:`+ \left(x_{4i-2} - 2 x_{4i-1}\right)^4`                                                                                                                        |                                          |                               |                             |
|                         | :math:`+ 10\left(x_{4i-3} - x_{4i}\right)^4`                                                                                                                          |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Dixon-Price             | :math:`f(\mathbf{x}) = \left(x_1 - 1\right)^2 + \sum_{i=2}^d i\left(2 x_i^2 - x_{i-1}\right)^2`                                                                       | :math:`x_i \in [-10, 10]`                |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Ellipsoid               | :math:`f_2(\mathbf{x}) = \sum_{i=1}^D 10^{6 \frac{i-1}{D-1}} z_i^2 + f_{\mathrm{opt}}`                                                                                | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Discus                  | :math:`f(\mathbf{x}) = 10^6 x_1^2 + \sum_{i=2}^D x_i^2`                                                                                                               | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| BentCigar               | :math:`f(\mathbf{x}) = x_1^2 + 10^6 \sum_{i=2}^n x_i^2`                                                                                                               | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| SharpRidge              | :math:`f(\mathbf{x}) = x_1^2 + 100 \sqrt{\sum_{i=2}^D x_i^2}`                                                                                                         | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Katsuura                | :math:`f(\mathbf{x}) = \frac{10}{D^2} \prod_{i=1}^D \left(1 + i \sum_{j=1}^{32} \frac{2^j x_i - \left[2^j x_i\right]}{2^j}\right)^{10 / D^{1.2}}`                     | :math:`x_i \in [-5, 5]`                  |                               |                             |
|                         | :math:`- \frac{10}{D^2} + f_{\mathrm{pen}}(\mathbf{x})`                                                                                                               |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Weierstrass             | :math:`f_{16}(\mathbf{x}) = 10 \left(\frac{1}{D} \sum_{i=1}^D \sum_{k=0}^{11} \frac{1}{2^k} \cos \left(2 \pi 3^k\left(z_i + \frac{1}{2}\right)\right) - f_0\right)^3` | :math:`x_i \in [-5, 5]`                  |                               |                             |
|                         | :math:`+ \frac{10}{D} f_{\mathrm{pen}}(\mathbf{x})`                                                                                                                   |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| DifferentPowers         | :math:`f(\mathbf{x}) = \sqrt{\sum_{i=1}^D x_i^{2 + 4 \frac{i-1}{D-1}}}`                                                                                               | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Trid                    | :math:`f(\mathbf{x}) = \sum_{i=1}^d \left(x_i - 1\right)^2 - \sum_{i=2}^d x_i x_{i-1}`                                                                                | :math:`x_i \in [-d^2, d^2]`              |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| LinearSlope             | :math:`f(\mathbf{x}) = \sum_{i=1}^D 5 s_i - s_i x_i`                                                                                                                  | :math:`x_i \in [-5, 5]`                  |                               |                             |
|                         | :math:`s_i = \operatorname{sign}\left(x_i^{\mathrm{opt}}\right) 10^{\frac{i-1}{D-1}},`                                                                                |                                          |                               |                             |
|                         | :math:`\text{for } i=1, \ldots, D`                                                                                                                                    |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Elliptic                | :math:`f(\mathbf{x}) = \sum_{i=1}^D \left(10^6\right)^{\frac{i-1}{D-1}} x_i^2`                                                                                        | :math:`x_i \in [-5, 5]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| PERM                    | :math:`f(\mathbf{x}) = \sum_{i=1}^d \left(\sum_{j=1}^d \left(j + \beta\right)\left(x_j^i - \frac{1}{j^i}\right)\right)^2`                                             | :math:`x_i \in [-d, d]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Power Sum               | :math:`f(\mathbf{x}) = \sum_{i=1}^d \left[\left(\sum_{j=1}^d x_j^i\right) - b_i\right]^2`                                                                             | :math:`x_i \in [0, d]`                   |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Zakharov                | :math:`f(\mathbf{x}) = \sum_{i=1}^d x_i^2 + \left(\sum_{i=1}^d 0.5 i x_i\right)^2`                                                                                    | :math:`x_i \in [-5, 10]`                 |                               |                             |
|                         | :math:`+ \left(\sum_{i=1}^d 0.5 i x_i\right)^4`                                                                                                                       |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Six-Hump Camel          | :math:`f(\mathbf{x}) = \left(4 - 2.1 x_1^2 + \frac{x_1^4}{3}\right) x_1^2 + x_1 x_2`                                                                                  | :math:`x_1 \in [-3, 3], x_2 \in [-2, 2]` |                               |                             |
|                         | :math:`+ \left(-4 + 4 x_2^2\right) x_2^2`                                                                                                                             |                                          |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Michalewicz             | :math:`f(\mathbf{x}) = -\sum_{i=1}^d \sin \left(x_i\right) \sin ^{2 m}\left(\frac{i x_i^2}{\pi}\right)`                                                               | :math:`x_i \in [0, \pi]`                 |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| Moving Peak             | :math:`f(\mathbf{x}) = \sum_{i=1}^D \left(10^6\right)^{\frac{i-1}{D-1}} x_i^2`                                                                                        | :math:`x_i \in [0, 100]`                 |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+
| PERM 2                  | :math:`f(\mathbf{x}) = \sum_{i=1}^d\left(\sum_{j=1}^d\left(j^i+\beta\right)\left(\left(\frac{x_j}{j}\right)^i-1\right)\right)^2`                                      | :math:`x_i \in [-d, d]`                  |                               |                             |
+-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------------------------+-------------------------------+-----------------------------+


.. _hpo-problems:

Hyperparameter Optimization Problem
------------------------------------

This section provides an overview of the hyperparameter optimization problem including the hyperparameters used for various machine learning models and machine learning tasks used for generate problem instances.

Hyperparameters for Support Vector Machine (SVM)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Support Vector Machines (SVM) are widely used for classification and regression tasks. They are particularly effective in high-dimensional spaces and situations where the number of dimensions exceeds the number of samples. The hyperparameters for SVM control the regularization and the kernel function, which are crucial for model performance.

+--------------------+-------------------+------------+
| **Hyperparameter** |     **Range**     |  **Type**  |
+====================+===================+============+
| C                  | :math:`[-10, 10]` | Continuous |
+--------------------+-------------------+------------+
| gamma              | :math:`[-10, 10]` | Continuous |
+--------------------+-------------------+------------+

Hyperparameters for AdaBoost
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

AdaBoost is a popular ensemble method that combines multiple weak learners to create a strong classifier. It is particularly useful for boosting the performance of decision trees. The hyperparameters control the number of estimators and the learning rate, which affects the contribution of each classifier.

+--------------------+-------------------+------------+
| **Hyperparameter** |     **Range**     |  **Type**  |
+====================+===================+============+
| n_estimators       | :math:`[1, 100]`  | Integer    |
+--------------------+-------------------+------------+
| learning_rate      | :math:`[0.01, 1]` | Continuous |
+--------------------+-------------------+------------+

Hyperparameters for Random Forest
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Random Forest is an ensemble learning method that builds multiple decision trees and merges them to get a more accurate and stable prediction. It is widely used for both classification and regression tasks. The hyperparameters include the number of trees, the depth of the trees, and various criteria for splitting nodes.

+--------------------------+--------------------+-------------+
|    **Hyperparameter**    |     **Range**      |  **Type**   |
+==========================+====================+=============+
| n_estimators             | :math:`[1, 1000]`  | Integer     |
+--------------------------+--------------------+-------------+
| max_depth                | :math:`[1, 100]`   | Integer     |
+--------------------------+--------------------+-------------+
| criterion                | {gini, entropy}    | Categorical |
+--------------------------+--------------------+-------------+
| min_samples_leaf         | :math:`[1, 20]`    | Integer     |
+--------------------------+--------------------+-------------+
| min_weight_fraction_leaf | :math:`[0.0, 0.5]` | Continuous  |
+--------------------------+--------------------+-------------+
| min_impurity_decrease    | :math:`[0.0, 1.0]` | Continuous  |
+--------------------------+--------------------+-------------+

Hyperparameters for XGBoost
^^^^^^^^^^^^^^^^^^^^^^^^^^^

XGBoost is an efficient and scalable implementation of gradient boosting, designed for speed and performance. It is widely used in machine learning competitions and industry for classification and regression tasks. The hyperparameters include learning rates, tree depths, and regularization parameters, which control the complexity of the model and its ability to generalize.

+--------------------+-----------------------+------------+
| **Hyperparameter** |       **Range**       |  **Type**  |
+====================+=======================+============+
| eta                | :math:`[-10.0, 0.0]`  | Continuous |
+--------------------+-----------------------+------------+
| max_depth          | :math:`[1, 15]`       | Integer    |
+--------------------+-----------------------+------------+
| min_child_weight   | :math:`[0.0, 7.0]`    | Continuous |
+--------------------+-----------------------+------------+
| colsample_bytree   | :math:`[0.01, 1.0]`   | Continuous |
+--------------------+-----------------------+------------+
| colsample_bylevel  | :math:`[0.01, 1.0]`   | Continuous |
+--------------------+-----------------------+------------+
| reg_lambda         | :math:`[-10.0, 10.0]` | Continuous |
+--------------------+-----------------------+------------+
| reg_alpha          | :math:`[-10.0, 10.0]` | Continuous |
+--------------------+-----------------------+------------+
| subsample_per_it   | :math:`[0.1, 1.0]`    | Continuous |
+--------------------+-----------------------+------------+
| n_estimators       | :math:`[1, 50]`       | Integer    |
+--------------------+-----------------------+------------+
| gamma              | :math:`[0.0, 1.0]`    | Continuous |
+--------------------+-----------------------+------------+

Hyperparameters for GLMNet
^^^^^^^^^^^^^^^^^^^^^^^^^^

GLMNet is a regularized regression model that supports both LASSO and ridge regression. It is particularly useful for high-dimensional datasets where regularization is necessary to prevent overfitting. The hyperparameters control the strength of the regularization and the balance between L1 and L2 penalties.

+--------------------+---------------------------+-------------+
| **Hyperparameter** |         **Range**         |  **Type**   |
+====================+===========================+=============+
| lambda             | :math:`[0, 10^5]`         | Log-integer |
+--------------------+---------------------------+-------------+
| alpha              | :math:`[0, 1]`            | Continuous  |
+--------------------+---------------------------+-------------+
| nlambda            | :math:`[1, 100]`          | Integer     |
+--------------------+---------------------------+-------------+

Hyperparameters for AlexNet
^^^^^^^^^^^^^^^^^^^^^^^^^^^

AlexNet is a convolutional neural network (CNN) architecture that revolutionized the field of computer vision by achieving significant improvements on the ImageNet dataset. The hyperparameters include learning rate, dropout rate, weight decay, and the choice of activation function, all of which are crucial for training deep neural networks.

+---------------------+----------------------------+-------------+
| **Hyperparameter**  |         **Range**          |  **Type**   |
+=====================+============================+=============+
| learning_rate       | :math:`[10^{-5}, 10^{-1}]` | Continuous  |
+---------------------+----------------------------+-------------+
| dropout_rate        | :math:`[0.0, 0.5]`         | Continuous  |
+---------------------+----------------------------+-------------+
| weight_decay        | :math:`[10^{-5}, 10^{-2}]` | Continuous  |
+---------------------+----------------------------+-------------+
| activation_function | {ReLU, Leaky ReLU, ELU}    | Categorical |
+---------------------+----------------------------+-------------+

Hyperparameters for 2-Layer Bayesian Neural Network (BNN)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Bayesian Neural Networks (BNNs) provide a probabilistic interpretation of deep learning models by introducing uncertainty in the weights. This allows BNNs to express model uncertainty, which is crucial for tasks where uncertainty quantification is important. The hyperparameters include layer sizes, step length, burn-in period, and momentum decay.

+--------------------+----------------------------+----------------+
| **Hyperparameter** |         **Range**          |    **Type**    |
+====================+============================+================+
| layer 1            | :math:`[2^4, 2^9]`         | Log-integer    |
+--------------------+----------------------------+----------------+
| layer 2            | :math:`[2^4, 2^9]`         | Log-integer    |
+--------------------+----------------------------+----------------+
| step_length        | :math:`[10^{-6}, 10^{-1}]` | Log-continuous |
+--------------------+----------------------------+----------------+
| burn_in            | :math:`[0, 8]`             | Integer        |
+--------------------+----------------------------+----------------+
| momentum_decay     | :math:`[0, 1]`             | Log-continuous |
+--------------------+----------------------------+----------------+

Hyperparameters for CNNs
^^^^^^^^^^^^^^^^^^^^^^^^

Convolutional Neural Networks (CNNs) are the backbone of most modern computer vision systems. They are designed to automatically and adaptively learn spatial hierarchies of features through backpropagation. The hyperparameters include learning rate, momentum, regularization parameter, dropout rate, and activation function.

+--------------------------+----------------------------+-------------+
|    **Hyperparameter**    |         **Range**          |  **Type**   |
+==========================+============================+=============+
| learning_rate            | :math:`[10^{-6}, 10^{-1}]` | Continuous  |
+--------------------------+----------------------------+-------------+
| momentum                 | :math:`[0.0, 0.9]`         | Continuous  |
+--------------------------+----------------------------+-------------+
| regularization_parameter | :math:`[10^{-6}, 10^{-2}]` | Continuous  |
+--------------------------+----------------------------+-------------+
| dropout_rate             | :math:`[0, 0.5]`           | Continuous  |
+--------------------------+----------------------------+-------------+
| activation_function      | {ReLU, Leaky ReLU, Tanh,   | Categorical |
|                          | Sigmoid}                   |             |
+--------------------------+----------------------------+-------------+

Hyperparameters for ResNet18
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

ResNet18 is a residual network architecture that introduced the concept of residual connections, allowing for the training of very deep networks by mitigating the vanishing gradient problem. The hyperparameters include learning rate, momentum, dropout rate, and weight decay.

+--------------------+----------------------------+------------+
| **Hyperparameter** |         **Range**          |  **Type**  |
+====================+============================+============+
| learning_rate      | :math:`[10^{-5}, 10^{-1}]` | Continuous |
+--------------------+----------------------------+------------+
| momentum           | :math:`[0, 1]`             | Continuous |
+--------------------+----------------------------+------------+
| dropout_rate       | :math:`[0, 0.5]`           | Continuous |
+--------------------+----------------------------+------------+
| weight_decay       | :math:`[10^{-5}, 10^{-2}]` | Continuous |
+--------------------+----------------------------+------------+

Hyperparameters for DenseNet
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

DenseNet is a densely connected convolutional network that connects each layer to every other layer in a feed-forward fashion. This architecture improves the flow of information and gradients throughout the network, making it easier to train. The hyperparameters include learning rate, momentum, dropout rate, and weight decay.

+--------------------+----------------------------+------------+
| **Hyperparameter** |         **Range**          |  **Type**  |
+====================+============================+============+
| learning_rate      | :math:`[2^3, 2^8]`         | Integer    |
+--------------------+----------------------------+------------+
| momentum           | :math:`[0, 1]`             | Continuous |
+--------------------+----------------------------+------------+
| dropout_rate       | :math:`[0, 0.5]`           | Continuous |
+--------------------+----------------------------+------------+
| weight_decay       | :math:`[10^{-5}, 10^{-1}]` | Continuous |
+--------------------+----------------------------+------------+

Machine Learning Tasks
^^^^^^^^^^^^^^^^^^^^^^

This section lists the various sources of machine learning tasks used for hyperparameter optimization, including classification and regression problems. These datasets are widely recognized in the machine learning community and are used for benchmarking algorithms.

+--------------------------------------------------------+---------------------------+------------+---------+
|                       **Source**                       |         **Type**          | **Number** | **IDs** |
+========================================================+===========================+============+=========+
| `OpenML-CC18 <https://www.openml.org/s/99>`_           | Classification            | 78         | 1-78    |
+--------------------------------------------------------+---------------------------+------------+---------+
| `UC Irvine Repository <https://archive.ics.uci.edu/>`_ | Classification/Regression | 10         | 79-88   |
+--------------------------------------------------------+---------------------------+------------+---------+
| `NAS-Bench-360 <https://archive.ics.uci.edu/>`_        | Classification/Regression | 5          | 89-93   |
+--------------------------------------------------------+---------------------------+------------+---------+
| `NATS-Bench <https://github.com/D-X-Y/NATS-Bench>`_    | Classification            | 3          | 94-96   |
+--------------------------------------------------------+---------------------------+------------+---------+
| `SVHN <https://github.com/D-X-Y/NATS-Bench>`_          | Classification            | 1          | 97      |
+--------------------------------------------------------+---------------------------+------------+---------+


.. _cso-problems:

Configurable Software Optimization Problem
------------------------------------------

This section provides a summary of the configurable software optimization (CSO) tasks, which involve optimizing various software systems. The tasks are characterized by the number of variables, objectives, and workloads, along with the sources of these workloads.

+-------------------+---------------+----------------+---------------+------------------------------------------------------------------------------------------------------------------------------------------+
| **Software Name** | **Variables** | **Objectives** | **Workloads** |                                                           **Workloads Source**                                                           |
+===================+===============+================+===============+==========================================================================================================================================+
| LLVM              | 93            | 8              | 50            | `PolyBench <https://web.cs.ucla.edu/~pouchet/software/polybench/>`_, `mibench <https://github.com/embecosm/mibench?tab=readme-ov-file>`_ |
+-------------------+---------------+----------------+---------------+------------------------------------------------------------------------------------------------------------------------------------------+
| GCC               | 105           | 8              | 50            | `PolyBench <https://web.cs.ucla.edu/~pouchet/software/polybench/>`_, `mibench <https://github.com/embecosm/mibench?tab=readme-ov-file>`_ |
+-------------------+---------------+----------------+---------------+------------------------------------------------------------------------------------------------------------------------------------------+
| Mysql             | 28            | 14             | 18            | `benchbase <https://github.com/cmu-db/benchbase.git>`_, `sysbench <https://github.com/akopytov/sysbench>`_                               |
+-------------------+---------------+----------------+---------------+------------------------------------------------------------------------------------------------------------------------------------------+
| Hadoop            | 206           | 1              | 29            | `HiBench <https://github.com/Intel-bigdata/HiBench>`_                                                                                    |
+-------------------+---------------+----------------+---------------+------------------------------------------------------------------------------------------------------------------------------------------+

.. _rna-problems:

RNA Inverse Design Problem
---------------------------

RNA inverse design involves designing RNA sequences that fold into specific secondary structures. This task is crucial for understanding and manipulating RNA function in various biological processes. The datasets listed here are commonly used benchmarks for RNA design algorithms.

+---------------------------------------------------------------------+-------------------------+-------------+
|                             **Source**                              | **Min-Max Length (nt)** | **Samples** |
+=====================================================================+=========================+=============+
| `Eterna100 <https://github.com/eternagame/eterna100-benchmarking>`_ | 11-399                  | 100         |
+---------------------------------------------------------------------+-------------------------+-------------+
| `Rfam-learn test <https://rfam.org/>`_                              | 50-446                  | 100         |
+---------------------------------------------------------------------+-------------------------+-------------+
| `RNA-Strand <http://www.rnasoft.ca/strand/>`_                       | 4-4381                  | 50          |
+---------------------------------------------------------------------+-------------------------+-------------+
| `RNAStralign <https://github.com/D-X-Y/NATS-Bench>`_                | 30-1851                 | 37149       |
+---------------------------------------------------------------------+-------------------------+-------------+
| `ArchiveII <https://github.com/D-X-Y/NATS-Bench>`_                  | 28-2968                 | 2975        |
+---------------------------------------------------------------------+-------------------------+-------------+


.. _pif-problems:

Protein Inverse Folding Problem
--------------------------------

Protein Inverse Folding involves creating new amino acids sequence folding into desiered backbone structure. These problems are essential for applications in drug design, biotechnology, and synthetic biology. The datasets listed here are widely used in protein inverse folding research.

+--------------------------------------------------------+-----------------------------+-------------+
|                       **Source**                       |          **Type**           | **Numbers** |
+========================================================+=============================+=============+
| `Absolute <https://github.com/csi-greifflab/Absolut>`_ | Antibody design             | 159         |
+--------------------------------------------------------+-----------------------------+-------------+
| `CATH <https://www.cathdb.info/>`_                     | Single-chain protein design | 19752       |
+--------------------------------------------------------+-----------------------------+-------------+
| `Protein Data Bank <https://www.rcsb.org/>`_           | Multi-chain protein design  | 26361       |
+--------------------------------------------------------+-----------------------------+-------------+

.. _parallelization:

Parallelization
---------------

To-do

================================================
FILE: docs/source/usage/results.rst
================================================
Results Analysis
================


.. admonition:: Overview
   :class: info

   - :ref:`Register a New Results Analysis Method <registering-new-analysis>`: How to add a new results analysis method to :ref:`TransOPT <home>`.
   - :ref:`Customization Analysis Pipline<customization>`: How to customize your own results analysis pipline or add your own analysis method into the pipline.
   - :ref:`Performance Evaluation Metrics <performance-evaluation-metrics>`: The list of the performance evaluation metrics available in :ref:`TransOPT <home>`
   - :ref:`Statistical Measures <statistical-measures>`: The list of the statistical measures supportede in :ref:`TransOPT <home>`


.. _registering-new-analysis:

Register a New Results Analysis Method
--------------------------------------


.. _customization:


Customization Analysis Pipline
------------------------------


.. _performance-evaluation-metrics:

List of Performance Evaluation Metrics
--------------------------------------

For each type of task instance, the framework offers performance evaluation metrics to assess the quality of the solutions generated by the algorithms. The metrics are categorized based on the type of task and are designed to evaluate various aspects of the solutions. The tables below summarize the performance metrics available for different tasks.

+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|         **Task**         |     **Metric**     |                       **Description**                        | **Scale** |   **Type**   |
+==========================+====================+==============================================================+===========+==============+
| **Synthetic**            | Absolute Error     | The difference between the min value and the optimal         | [0, ∞]    | Minimization |
|                          |                    | solution.                                                    |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **HPO (Classification)** | F1 Score           | The mean of precision and recall, providing a balanced       | [0, 1]    | Maximization |
|                          |                    | measure of accuracy.                                         |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | Area Under Curve   | The area under the receiver operating characteristic         | [0, 1]    | Maximization |
|                          |                    | (ROC) curve, quantifying the overall ability of a classifier |           |              |
|                          |                    | to discriminate between positive and negative instances.     |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **HPO (Regression)**     | RMSE               | Root mean squared error (RMSE) measures the average          | [0, ∞]    | Minimization |
|                          |                    | magnitude of the differences between predicted values and    |           |              |
|                          |                    | actual values.                                               |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | MAE                | Mean absolute error (MAE) measures the average absolute      | [0, ∞]    | Minimization |
|                          |                    | differences between predicted values and actual values.      |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **Protein Design**       | Binding Affinity   | The strength of the interaction between a protein and its    | [-∞, 0]   | Minimization |
|                          |                    | ligand, typically measured by the equilibrium dissociation   |           |              |
|                          |                    | constant.                                                    |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **RNA Inverse Design**   | GC-content         | The percentage of guanine (G) and cytosine (C) bases in a    | [0, 1]    | Maximization |
|                          |                    | DNA or RNA molecule, which affects the stability and         |           |              |
|                          |                    | melting temperature.                                         |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **LLVM/GCC**             | Avg Execution Time | The average execution time of multiple runs.                 | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | Compilation Time   | The time required to compile the code.                       | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | File Size          | The size of the executable file generated after compilation. | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | Max RSS            | The maximum resident set size used during execution.         | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI TOT CYC       | The total number of CPU cycles consumed during execution.    | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI TOT INS       | The total number of instructions executed by the CPU.        | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI BR MSP        | The number of times the CPU mispredicted branch directions.  | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI BR PRC        | The number of times the CPU correctly predicted branch       | [0, ∞]    | Minimization |
|                          |                    | directions.                                                  |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI BR CN         | The number of conditional branch instructions.               | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | PAPI MEM WCY       | The number of cycles spent waiting for memory access.        | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **MySQL**                | Throughput         | The number of transactions processed per unit of time.       | [0, ∞]    | Maximization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | Latency            | The time required to complete a single transaction from      | [0, ∞]    | Minimization |
|                          |                    | initiation to completion.                                    |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | CPU Usage          | The proportion of CPU resources used during database         | [0, ∞]    | Minimization |
|                          |                    | operations.                                                  |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
|                          | Memory Usage       | The amount of memory resources used during database          | [0, ∞]    | Minimization |
|                          |                    | operations.                                                  |           |              |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+
| **Hadoop**               | Execution Time     | The execution time of a big data task.                       | [0, ∞]    | Minimization |
+--------------------------+--------------------+--------------------------------------------------------------+-----------+--------------+


.. _statistical-measures:


Statistical Measures
--------------------


This section provides detailed explanations of the statistical methods used for analyzing the performance of different algorithms. Each method is accompanied by the relevant formulas and calculation procedures.

Wilcoxon Signed-Rank Test
--------------------------

The **Wilcoxon signed-rank test** is a non-parametric statistical test used to compare two paired samples. Unlike the paired t-test, the Wilcoxon signed-rank test does not assume that the differences between pairs are normally distributed. It is particularly useful when dealing with small sample sizes or non-normally distributed data.

Given two related samples :math:`X` and :math:`Y`, the steps to perform the Wilcoxon signed-rank test are:

1. **Compute the differences** between each pair of observations: :math:`d_i = X_i - Y_i`.
2. **Rank the absolute values** of the differences, assigning ranks from the smallest to the largest difference.
3. **Assign signs** to the ranks based on the sign of the original differences :math:`d_i`.
4. **Calculate the test statistic** :math:`W`, which is the sum of the ranks corresponding to the positive differences:

   .. math::

      W = \sum_{d_i > 0} \text{Rank}(d_i)

5. Compare the computed test statistic :math:`W` against the critical value from the Wilcoxon signed-rank table or calculate the p-value to determine the significance of the result.

Scott-Knott Test
----------------

The **Scott-Knott test** is a statistical method used to rank the performance of different techniques across multiple runs on each benchmark instance. It is particularly effective in scenarios where multiple comparisons are being made, and it controls the family-wise error rate.

The procedure involves:

1. **Partitioning the data**: Initially, all techniques are considered in one group. The group is then split into two subgroups if the mean difference between them is statistically significant.
2. **Calculating the mean difference** between the groups using an appropriate test (e.g., ANOVA or t-test).
3. **Assigning ranks**: If a significant difference is found, the techniques are ranked within their respective subgroups. If no significant difference is found, the techniques are considered to be in the same rank.
4. **Repeating the process** until no further significant splits can be made.

The Scott-Knott test is particularly useful for determining the relative performance of multiple techniques, providing a clear ranking based on statistically significant differences.

A12 Effect Size
---------------

The **A12 effect size** is a non-parametric measure used to evaluate the probability that one algorithm outperforms another. It is particularly useful in understanding whether observed differences are practically significant, beyond just being statistically significant.

The A12 statistic is calculated as follows:

1. Let :math:`A` and :math:`B` be the two sets of performance measures for two algorithms.
2. **Calculate the A12 statistic**:

   .. math::

      A_{12} = \frac{\sum_{x \in A} \sum_{y \in B} \mathbf{I}(x > y) + 0.5 \cdot \mathbf{I}(x = y)}{|A| \cdot |B|}


Critical Difference (CD)
------------------------

The **Critical Difference (CD)** is a statistical measure used to assess whether performance differences between algorithms are derived from randomness. It is typically used in conjunction with methods like the Friedman test or Nemenyi post-hoc test to evaluate multiple algorithms across multiple datasets.

The steps involved in calculating the Critical Difference are:

1. **Perform a Friedman test** to rank the algorithms for each dataset.
2. **Calculate the average ranks** for each algorithm across all datasets.
3. **Compute the Critical Difference (CD)** using the following formula:

   .. math::

      \text{CD} = q_{\alpha} \sqrt{\frac{k(k+1)}{6N}}

   where:
   - :math:`q_{\alpha}` is the critical value for a given significance level :math:`\alpha` from the studentized range statistic.
   - :math:`k` is the number of algorithms.
   - :math:`N` is the number of datasets.


4. If the difference in average ranks between two algorithms exceeds the CD, the performance difference is considered statistically significant, and not due to random variation.

These statistical methods provide robust tools for comparing algorithm performance across various benchmarks, ensuring that conclusions drawn are both statistically and practically significant.


================================================
FILE: docs/source/usage/visualization.rst
================================================
Visualization
===============

This section demonstrates various visualization techniques used in TransOPT.

Data Filtering and Statistical Visualization
--------------------------------------------

This section demonstrates how to filter data into multiple groups based on different conditions, perform statistical analysis on these groups, and visualize the results using box plots and trajectory plots.

.. figure:: /_static/figures/visualization/filter.jpeg
   :alt: Data Filtering Process
   :width: 100%
   :align: center

   Figure 1: Four groups with different surrogate model. 

The above figure illustrates the process of filtering data into multiple groups based on different conditions. This visual representation helps to understand how the data is segmented and analyzed in our visualization approach.

Key steps in the data filtering process:

1. Click + to add a new filter group.
2. Define filter conditions for each group.
3. Apply filters and generate visualizations (e.g., box plots, trajectory plots) for each group


Visualization of Filtered Data
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

After filtering the data into groups, TransOPT provides two main types of visualizations to compare and analyze the results: trajectory plots and box plots.

Trajectory Plot
"""""""""""""""

The trajectory plot shows the performance of different groups over time or iterations.

.. figure:: /_static/figures/visualization/traj_compare.jpg
   :alt: Trajectory Plot of Different Groups
   :width: 50%
   :align: center

   Figure 2: Trajectory plot comparing performance of different surrogate model groups over iterations.

This plot allows you to:

- Compare the convergence rates of different groups
- Identify which group performs better at different stages of the optimization process
- Observe any significant differences in performance trends among the groups

Box Plot
""""""""

The box plot provides a statistical summary of the performance distribution for each group.

.. figure:: /_static/figures/visualization/box_compare.jpg
   :alt: Box Plot of Different Groups
   :width: 50%
   :align: center

   Figure 3: Box plot showing performance distribution of different surrogate model groups.

Key insights from the box plot:

- Median performance of each group
- Spread of performance within each group
- Presence of any outliers
- Easy comparison of performance distributions across groups


Analysis of Individual Datasets
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

TransOPT also provides tools for in-depth analysis of individual datasets. This section outlines the process and visualizations available for single dataset analysis.

1. Dataset Selection
""""""""""""""""""""

The first step is to select a specific dataset for analysis. Once selected, TransOPT generates a summary of the dataset's key information.

.. figure:: /_static/figures/visualization/choose.jpg
   :alt: Dataset Information Summary
   :width: 100%
   :align: center

   Figure 4: Summary of selected dataset information, including algorithm modules used and optimization problem details.


2. Trajectory Plot
""""""""""""""""""

The trajectory plot for the selected dataset shows the optimization performance over time or iterations.

.. figure:: /_static/figures/visualization/traj_solo.jpg
   :alt: Trajectory Plot of Single Dataset
   :width: 50%
   :align: center

   Figure 5: Trajectory plot showing the optimization performance for the selected dataset.

This visualization allows users to:

- Observe the convergence behavior of the optimization process
- Identify any plateaus or sudden improvements in performance
- Assess the overall efficiency of the optimization algorithm for this specific dataset

3. Variable Importance
""""""""""""""""""""""

The variable importance plot highlights which features or parameters had the most significant impact on the optimization outcome.

.. figure:: /_static/figures/visualization/importance.jpg
   :alt: Variable Importance Plot
   :width: 50%
   :align: center

   Figure 6: Variable importance plot showing the relative impact of different features or parameters.

This visualization helps users:

- Identify the most influential variables in the optimization process
- Understand which parameters might require more careful tuning
- Gain insights into the underlying structure of the optimization problem

4. Dimensionality Reduction Plot
""""""""""""""""""""""""""""""""

The dimensionality reduction plot provides a 2D representation of the high-dimensional sampling data, typically using techniques like PCA or t-SNE.

.. figure:: /_static/figures/visualization/footprint.jpg
   :alt: Dimensionality Reduction Plot
   :width: 50%
   :align: center

   Figure 7: 2D plot of the sampled data after dimensionality reduction.

This visualization allows users to:

- Observe clusters or patterns in the sampling data
- Identify regions of the search space that were more heavily explored
- Gain intuition about the structure of the optimization landscape


================================================
FILE: extra_requirements/analysis.json
================================================
{
    "analysis": ["pandas", "tikzplotlib", "pdf2image", "seaborn", "Pillow"] 
}

================================================
FILE: extra_requirements/remote.json
================================================
{
    "remote": ["flask", "requests", "celery"]
}

================================================
FILE: requirements.txt
================================================
scipy>=1.4.1
numpy>=1.18.1
ConfigSpace>=0.4.12
scikit-learn
openml
matplotlib
torch
torchvision
gpytorch
GPyOpt
gym
sobol-seq
xgboost
paramz
emukit
pymoo
jax
gplearn
oslo.concurrency>=4.2.0
git+https://github.com/SheffieldML/GPy.git@devel
mmh3
rich
tqdm
wilds
pyro-ppl
bohb-hpo
HEBO
git+https://github.com/RobustBench/robustbench.git
hyperopt
flask_cors
openai

# Analysis
pandas
tikzplotlib
pdf2image
seaborn
Pillow
networkx
    
# Remote
flask
requests
celery

================================================
FILE: resources/docker/absolut_image/Dockerfile
================================================
FROM ubuntu:latest

RUN apt-get update && \
    apt-get install -y git wget unzip build-essential

ENV INSTALL_DIR=/usr/local/Absolut
ENV TEMP_DIR=/root/Absolut_temp
ENV REPO_URL=https://github.com/csi-greifflab/Absolut

RUN mkdir -p $INSTALL_DIR && mkdir -p $TEMP_DIR

RUN git clone $REPO_URL $TEMP_DIR && \
    cd $TEMP_DIR/src && \
    sed -i 's/-Wl//g' Makefile && \
    make && \
    mv AbsolutNoLib /usr/local/bin/AbsolutNoLib

RUN rm -rf $TEMP_DIR

COPY prepare_antigen.sh /usr/local/bin/prepare_antigen.sh
RUN chmod +x /usr/local/bin/prepare_antigen.sh

WORKDIR $INSTALL_DIR

================================================
FILE: resources/docker/absolut_image/prepare_antigen.sh
================================================
#!/bin/bash

# 检查是否提供了 antigen 参数
if [ -z "$1" ]; then
    echo "Usage: $0 <antigen>"
    exit 1
fi

ANTIGEN=$1
INSTALL_DIR=/usr/local/Absolut

# 确保工作目录存在
mkdir -p $INSTALL_DIR
cd $INSTALL_DIR

# 获取文件名和下载 URL
info_output=$(AbsolutNoLib info_filenames $ANTIGEN)
filename=$(echo "$info_output" | grep -oP '(?<=Pre-calculated structures are in )[^\s]+')
url=$(echo "$info_output" | grep -oP '(?<=curl -O -J )[^\s]+')

# 检查文件是否已经存在
if [ -f "$INSTALL_DIR/${filename}" ]; then
    echo "File ${filename} already exists. Skipping download."
else
    if [ -n "$url" ]; then
        echo "Downloading from URL: $url"
        download_filename=$(basename $url)
        wget $url -O $INSTALL_DIR/$download_filename
        if [ $? -eq 0 ]; then
            unzip -o $INSTALL_DIR/$download_filename -d $INSTALL_DIR
            rm $INSTALL_DIR/$download_filename
        else
            echo "Download failed for $ANTIGEN"
            exit 1
        fi
    else
        echo "No URL found for antigen: $ANTIGEN"
        exit 1
    fi
fi

================================================
FILE: scripts/init_csstuning.sh
================================================
#!/bin/bash
pip install transopt_external/csstuning

bash transopt_external/csstuning/cssbench/compiler/docker/build_docker.sh
bash transopt_external/csstuning/cssbench/dbms/docker/build_docker.sh

csstuning_dbms_init -h


================================================
FILE: scripts/init_docker.sh
================================================
#!/bin/bash

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
DOCKER_ROOT_DIR="$SCRIPT_DIR/../resources/docker"

remove_old_images() {
    local image_name=$1

    old_image_ids=$(docker images -q --filter "reference=$image_name" | tail -n +2)
    
    if [ -n "$old_image_ids" ]; then
        echo "Removing old Docker image(s) with name '$image_name'..."
        docker rmi -f $old_image_ids
    fi
    
    dangling_image_ids=$(docker images -f "dangling=true" -q)
    if [ -n "$dangling_image_ids" ]; then
        echo "Removing dangling images..."
        docker rmi -f $dangling_image_ids
    fi
}

build_docker_image() {
    local image_name=$1
    local docker_dir=$2
    
    if [ -f "$docker_dir/Dockerfile" ]; then
        echo "Building Docker image '$image_name'..."
        docker build -t "$image_name" "$docker_dir"
        echo "Docker image '$image_name' created successfully."

        remove_old_images "$image_name"
    else
        echo "Dockerfile not found in $docker_dir"
        exit 1
    fi
}

# 构建 absolut_image
build_docker_image "absolut_image" "$DOCKER_ROOT_DIR/absolut_image"


================================================
FILE: setup.py
================================================
import os
import json
from setuptools import setup, find_packages
import subprocess

def get_extra_requirements(folder='./extra_requirements'):
    """ Helper function to read in all extra requirement files in the specified
        folder. """
    extra_requirements = {}
    if not os.path.exists(folder):
        print(f"Folder {folder} does not exist.")
        return extra_requirements

    for file in os.listdir(folder):
        if file.endswith('.json'):
            with open(os.path.join(folder, file), 'r', encoding='utf-8') as fh:
                requirements = json.load(fh)
                extra_requirements.update(requirements)

    print(f"Extra requirements: {extra_requirements}")
    return extra_requirements

extra_requirements = get_extra_requirements()


def build_docker_image(image_name, docker_dir):
    dockerfile_path = os.path.join(docker_dir, 'Dockerfile')
    
    if os.path.exists(dockerfile_path):
        print(f"Building Docker image {image_name}...")
        subprocess.run(['docker', 'build', '-t', image_name, docker_dir], check=True)
        print(f"Docker image '{image_name}' created successfully.")
    else:
        print(f"Dockerfile not found at {dockerfile_path}")
        raise FileNotFoundError(f"Dockerfile not found at {dockerfile_path}")

def init_absolut_docker():
    docker_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'resources/docker/absolut_image')
    build_docker_image('absolut_image', docker_dir)

req = [
    "scipy>=1.4.1",
    "numpy>=1.18.1",
    "ConfigSpace>=0.4.12",
    "scikit-learn",
    "openml",
    "matplotlib",
    "torch",
    "torchvision",
    "gpytorch",
    
    # "GPy",
    "GPyOpt",
    "gym",
    "sobol-seq",
    "xgboost",
    "paramz",
    "emukit",
    "pymoo",
    "jax",
    "networkx",
    "gplearn",
    "oslo.concurrency>=4.2.0",
    'GPy @ git+https://github.com/SheffieldML/GPy.git@devel',
    'mmh3',
    'rich',
    'flask_cors',
    'openai'
]

setup(
    name="transopt",
    version="0.0.1",
    author="transopt",
    description="Transfer Optimiztion System",
    long_description="This is a longer description of my package.",
    url="https://github.com/maopl/TransOpt.git",
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3",
    ],
    license="BSD",
    packages=find_packages(exclude=["hpobench"]),
    install_requires=req,
    extras_require=extra_requirements,
    entry_points={
        'console_scripts': [
            'transopt-server = transopt.agent.app:main',
            'init-absolut-docker = transopt.scripts.init_docker:init_absolut_docker',
        ],
    }
)


================================================
FILE: tests/EXP_NSGA2.py
================================================
import numpy as np
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.core.problem import Problem
from transopt.benchmark.HPO.HPO import HPO_ERM

class HPOProblem(Problem):
    def __init__(self, task_name, budget_type, budget, seed, workload):
        self.hpo = HPO_ERM(task_name=task_name, budget_type=budget_type, budget=budget, seed=seed, workload=workload, algorithm='ERM', gpu_id=0, augment='cutout', architecture='resnet', model_size=18, optimizer='nsga2_augment_cutout', base_dir='/data/')

        original_ranges = self.hpo.configuration_space.original_ranges
        n_var = len(original_ranges)
        xl = np.array([original_ranges[key][0] for key in original_ranges])
        xu = np.array([original_ranges[key][1] for key in original_ranges])
        super().__init__(n_var=n_var, n_obj=2, n_constr=0, xl=xl, xu=xu)
    
    def _evaluate(self, X, out, *args, **kwargs):
        f1 = []
        f2 = []
        for x in X:
            config = {}
            for i, param_name in enumerate(self.hpo.configuration_space.original_ranges):
                if param_name == 'epoch':
                    config[param_name] = int(x[i])
                else:
                    config[param_name] = x[i]
            val_acc = self.hpo.objective_function(config)
            f1.append(1 - val_acc['test_standard_acc'])  # Minimize 1 - accuracy
            f2.append(1- val_acc['test_robust_acc'])  # Minimize number of epochs
        out["F"] = np.column_stack([f1, f2])

if __name__ == "__main__":
    problem = HPOProblem(task_name='test_task', budget_type='FEs', budget=3000, seed=0, workload=0)
    algorithm = NSGA2(pop_size=40)
    res = minimize(problem, algorithm, ('n_gen', 50), seed=1, verbose=True)
    
    print("Best solutions found:")
    for i in range(len(res.X)):
        print(f"Solution {i+1}: {res.X[i]}, Objectives: {res.F[i]}")


================================================
FILE: tests/EXP_NSGA2_restart.py
================================================
import numpy as np
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.core.problem import Problem
from transopt.benchmark.HPO.HPO import HPO_ERM
import os
import pandas as pd
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
import matplotlib.pyplot as plt  # 添加這行
from pymoo.core.population import Population


class HPOProblem(Problem):
    def __init__(self, task_name, budget_type, budget, seed, workload, data_file):
        self.hpo = HPO_ERM(task_name=task_name, budget_type=budget_type, budget=budget, seed=seed, workload=workload, algorithm='ERM',architecture='resnet', model_size=18, optimizer='nsga2_augment_true')
        original_ranges = self.hpo.configuration_space.original_ranges
        n_var = len(original_ranges)
        xl = np.array([original_ranges[key][0] for key in original_ranges])
        xu = np.array([original_ranges[key][1] for key in original_ranges])
        super().__init__(n_var=n_var, n_obj=2, n_constr=0, xl=xl, xu=xu)

        # Load data from specified file
        self.data = {}
        for filename in os.listdir(data_file):
            file_path = os.path.join(data_file, filename)
            if os.path.isfile(file_path):
                import json
                import re
                with open(file_path, 'r') as f:
                    
                    content = json.load(f)
                    # Extract decision variables from filename
                    x = []
                    for key in original_ranges.keys():
                        pattern = rf'({key}_)([\d.e-]+)'
                        match = re.search(pattern, filename)
                        if match:
                            value = float(match.group(2))
                            if key in ['l', 'weight_decay']:
                                value = np.log10(value)
                            x.append(value)
                        elif key == 'epoch':
                            # Special handling for 'epoch' which is an integer
                            epoch_match = re.search(r'epoch_(\d+)', filename)
                            if epoch_match:
                                x.append(int(epoch_match.group(1)))
                        elif key in ['data_augmentation', 'class_balanced', 'nonlinear_classifier']:
                            # Special handling for boolean values
                            bool_match = re.search(rf'{key}_(True|False)', filename)
                            if bool_match:
                                x.append(bool_match.group(1) == 'True')
                    self.data[filename] = {
                        'x': x,
                        'test_standard_acc': content['test_standard_acc'],
                        'test_robust_acc': np.mean([v for k, v in content.items() if k.startswith('test_') and k != 'test_standard_acc'])
                    }
    def _evaluate(self, X, out, *args, **kwargs):
        f1 = []
        f2 = []
        for x in X:
            config = {}
            for i, param_name in enumerate(self.hpo.configuration_space.original_ranges):
                if param_name == 'epoch':
                    config[param_name] = int(x[i])
                else:
                    config[param_name] = x[i]
            val_acc = self.hpo.objective_function(config)
            f1.append(1 - val_acc['test_standard_acc'])  # Minimize 1 - accuracy
            f2.append(1- np.mean([v for k, v in val_acc.items() if k.startswith('test_') and k != 'test_standard_acc']))  # Minimize number of epochs
        out["F"] = np.column_stack([f1, f2])

if __name__ == "__main__":
    data_file = '/home/haxx/transopt_tmp/output/results/nsga2_false_augment_ERM_resnet_18_RobCifar10_0'
    problem = HPOProblem(task_name='test_task', budget_type='FEs', budget=3000, seed=0, workload=0, 
                         data_file=data_file)
    
    # Extract objectives from the loaded data
    F = np.array([[1 - data['test_standard_acc'], 1 - data['test_robust_acc']] for data in problem.data.values()])
    
    # Perform non-dominated sorting
    nds = NonDominatedSorting()
    fronts = nds.do(F)
    
    # Initialize lists for the initial population
    initial_X = []
    initial_F = []
    pop_size = 40  # Assuming a population size of 40, adjust as needed

    # Iterate through fronts and add solutions layer by layer
    for front in fronts:
        front_solutions = [list(problem.data.values())[i] for i in front]
        
        if len(initial_X) + len(front_solutions) <= pop_size:
            initial_X.extend([sol['x'] for sol in front_solutions])
            initial_F.extend([[1 - sol['test_standard_acc'], 1 - sol['test_robust_acc']] for sol in front_solutions])
        else:
            remaining_slots = pop_size - len(initial_X)
            if remaining_slots > 0:
                # Use niching to select the most diverse solutions from the current front
                front_x = np.array([sol['x'] for sol in front_solutions])
                from scipy.spatial.distance import cdist
                distances = cdist(front_x, front_x)
                selected_indices = []
                
                while len(selected_indices) < remaining_slots:
                    if len(selected_indices) == 0:
                        selected_indices.append(np.random.choice(len(front_x)))
                    else:
                        min_distances = np.min(distances[:, selected_indices], axis=1)
                        min_distances[selected_indices] = -np.inf
                        selected_indices.append(np.argmax(min_distances))
                
                initial_X.extend([front_solutions[i]['x'] for i in selected_indices])
                initial_F.extend([[1 - front_solutions[i]['test_standard_acc'], 1 - front_solutions[i]['test_robust_acc']] for i in selected_indices])
            break

    # Create the initial population with X, F, and set evaluated correctly
    initial_pop = Population.new(X=np.array(initial_X), F=np.array(initial_F))

    for ind in initial_pop:
        ind.evaluated = {"F", "CV"}  # Set evaluated to include both F and CV

    # 創建NSGA2算法
    algorithm = NSGA2(pop_size=len(initial_pop))
    
    # 設置總迭代次數
    total_evaluations = 2000
    current_evaluations = len(problem.data)
    remaining_evaluations = total_evaluations - current_evaluations
    remaining_generations = max(1, remaining_evaluations // pop_size)

    # 繼續優化
    res = minimize(problem, algorithm, ('n_gen', remaining_generations), seed=1, verbose=True)
    
    print("Best solutions found:")
    for i in range(len(res.X)):
        print(f"Solution {i+1}: {res.X[i]}, Objectives: {res.F[i]}")


================================================
FILE: tests/EXP_bohb.py
================================================
from bohb import BOHB
import bohb.configspace as cs
from transopt.benchmark.HPO.HPO import HPO_ERM
import numpy as np

# Create a single HPO_ERM instance
hpo = HPO_ERM(task_name='bohb_optimization', budget_type='FEs', budget=2000, seed=42, workload=0,algorithm='ERM',architecture='resnet', model_size=18, optimizer='bohb')

# Define the objective function
def objective(config, budget):
    result = hpo.objective_function(configuration=config, fidelity={'epoch': int(budget)})
    return 1 - result['function_value']  # BOHB minimizes, so we return the function value directly

# Define the configuration space
def get_configspace():
    original_ranges = hpo.configuration_space.original_ranges
    hyperparameters = [cs.UniformHyperparameter(param_name, lower=param_range[0], upper=param_range[1]) for param_name, param_range in original_ranges.items() ]
    space = cs.ConfigurationSpace(hyperparameters)
    
    return space

if __name__ == "__main__":
    # Create the configuration space
    config_space = get_configspace()
    
    # Initialize BOHB
    bohb = BOHB(configspace=config_space,
                eta=3, min_budget=1, max_budget=50, n_samples=200,
                evaluate=objective)
    
    # Run optimization
    results = bohb.optimize()
    

================================================
FILE: tests/EXP_grid.py
================================================
import numpy as np
from transopt.benchmark.HPO.HPO import HPO_ERM
from scipy.stats import qmc

def sobol_search(n_samples, task_name, budget_type, budget, seed, workload):
    hpo = HPO_ERM(task_name=task_name, budget_type=budget_type, budget=budget, seed=seed, workload=workload, optimizer='sobol')
    original_ranges = hpo.configuration_space.original_ranges
    n_var = len(original_ranges)
    xl = np.array([original_ranges[key][0] for key in original_ranges])
    xu = np.array([original_ranges[key][1] for key in original_ranges])
    
    # 创建Sobol序列采样器
    sampler = qmc.Sobol(d=n_var, scramble=True, seed=seed)
    
    # 生成Sobol序列样本
    sample = sampler.random(n=n_samples)
    
    # 将样本从[0,1]范围映射到参数实际范围
    scaled_sample = qmc.scale(sample, xl, xu)
    
    best_val_acc = 0
    best_config = None
    
    for i in range(n_samples):
        config = {}
        for j, param_name in enumerate(original_ranges.keys()):
            config[param_name] = scaled_sample[i, j]
        
        # 运行目标函数
        result = hpo.objective_function(configuration=config)
        val_acc = 1 - result['function_value']  # 因为我们最小化的是1-accuracy
        
        print(f"Trial {i + 1}/{n_samples}")
        print(f"Configuration: {config}")
        print(f"Validation Accuracy: {val_acc}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_config = config
        
        print(f"Best Validation Accuracy so far: {best_val_acc}")
        print("--------------------")
    
    print("\nSobol Search Completed")
    print(f"Best Configuration: {best_config}")
    print(f"Best Validation Accuracy: {best_val_acc}")

if __name__ == "__main__":
    # 设置随机种子以确保可重复性
    np.random.seed(0)
    
    # 运行Sobol序列搜索
    sobol_search(
        n_samples=5000,  # 指定采样数量
        task_name='sobol_search_hpo',
        budget_type='FEs',
        budget=5000,
        seed=0,
        workload=0  # 对应于 RobCifar10 数据集
    )


================================================
FILE: tests/EXP_hebo.py
================================================
import numpy as np
from hebo.design_space.design_space import DesignSpace
from hebo.optimizers.hebo import HEBO
from transopt.benchmark.HPO.HPO import HPO_ERM

# Create a single HPO_ERM instance
hpo = HPO_ERM(task_name='hebo_optimization', budget_type='FEs', budget=2000, seed=42, workload=0, algorithm='ERM',architecture='resnet', model_size=18, optimizer='hebo')

# Define the objective function
def objective(config):
    result = hpo.objective_function(configuration=config)
    return 1 - result['function_value']

# Define the design space
def get_design_space():
    original_ranges = hpo.configuration_space.original_ranges
    space = DesignSpace().parse([
        {'name': param_name, 'type': 'num', 'lb': param_range[0], 'ub': param_range[1]}
        for param_name, param_range in original_ranges.items()
    ])
    return space

if __name__ == "__main__":
    # Create the design space
    design_space = get_design_space()
    
    # Initialize HEBO
    opt = HEBO(design_space, scramble_seed=0)
    
    # Run optimization
    n_iterations = 200
    for i in range(n_iterations):
        rec = opt.suggest(n_suggestions=1)
        f_val = objective(rec.to_dict(orient='records')[0])
        y = np.array([[f_val]])
        opt.observe(rec, y)
        print(f'After {i+1} iterations, best obj is {opt.y.min():.4f}')


================================================
FILE: tests/EXP_hyperopt.py
================================================
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from transopt.benchmark.HPO.HPO import HPO_ERM
import numpy as np

# Create a single HPO_ERM instance
hpo = HPO_ERM(task_name='hyperopt_optimization', budget_type='FEs', budget=2000, seed=0, workload=0,algorithm='ERM',architecture='resnet', model_size=18, optimizer='hyperopt')

# Define the objective function
def objective(params):
    # Convert hyperopt params to the format expected by HPO_ERM
    config = {k: v[0] if isinstance(v, list) else v for k, v in params.items()}
    result = hpo.objective_function(configuration=config, fidelity={'epoch': 50})
    return {'loss': 1 - result['function_value'], 'status': STATUS_OK}

# Define the search space
def get_hyperopt_space():
    original_ranges = hpo.configuration_space.original_ranges
    space = {}
    for param_name, param_range in original_ranges.items():
        space[param_name] = hp.uniform(param_name, param_range[0], param_range[1])
    return space

if __name__ == "__main__":
    # Create the search space
    search_space = get_hyperopt_space()
    
    # Run optimization
    n_iterations = 200
    trials = Trials()
    # Set a random seed for reproducibility
    random_seed = 42
    np.random.seed(random_seed)
    
    best = fmin(fn=objective,
                space=search_space,
                algo=tpe.suggest,
                max_evals=n_iterations,
                trials=trials,
                rstate=np.random.default_rng(random_seed))
    
    # Print results
    print("Best hyperparameters found:", best)
    print("Best objective value:", 1 - min(trials.losses()))


================================================
FILE: tests/EXP_random.py
================================================
import numpy as np
from transopt.benchmark.HPO.HPO import HPO_ERM
import random

def random_search(n_trials, task_name, budget_type, budget, seed, workload):
    hpo = HPO_ERM(task_name=task_name, budget_type=budget_type, budget=budget, seed=seed, workload=workload, optimizer='random')
    
    original_ranges = hpo.configuration_space.original_ranges
    n_var = len(original_ranges)
    xl = np.array([original_ranges[key][0] for key in original_ranges])
    xu = np.array([original_ranges[key][1] for key in original_ranges])
    
    # 用于存储已经尝试过的配置
    tried_configs = set()
    
    best_val_acc = 0
    best_config = None
    
    for trial in range(n_trials):
        # 生成新的配置，直到得到一个未尝试过的配置
        while True:
            config = {}
            for i, name in enumerate(original_ranges.keys()):
                config[name] = np.random.uniform(xl[i], xu[i])
            
            # 将配置转换为不可变的类型（元组），以便可以添加到集合中
            config_tuple = tuple(sorted(config.items()))
            if config_tuple not in tried_configs:
                tried_configs.add(config_tuple)
                break
        
        # 设置固定的fidelity值
        
        # 运行目标函数
        result = hpo.objective_function(configuration=config)
        val_acc = 1 - result['function_value']  # 因为我们最小化的是1-accuracy
        
        print(f"Trial {trial + 1}/{n_trials}")
        print(f"Configuration: {config}")
        print(f"Validation Accuracy: {val_acc}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_config = config
        
        print(f"Best Validation Accuracy so far: {best_val_acc}")
        print("--------------------")
    
    print("\nRandom Search Completed")
    print(f"Best Configuration: {best_config}")
    print(f"Best Validation Accuracy: {best_val_acc}")

if __name__ == "__main__":
    # 设置随机种子以确保可重复性
    np.random.seed(0)
    random.seed(0)
    
    # 运行随机搜索
    random_search(
        n_trials=5000,  # 指定随机搜索的次数
        task_name='random_search_hpo',
        budget_type='FEs',
        budget=5000,
        seed=0,
        workload=0  # 对应于 RobCifar10 数据集
    )


================================================
FILE: tests/EXP_smac.py
================================================
from ConfigSpace import ConfigurationSpace
import ConfigSpace as cs
import numpy as np
import time
from smac import HyperparameterOptimizationFacade, Scenario
from transopt.benchmark.HPO.HPO import HPO_ERM

# Create a single HPO_ERM instance
hpo = HPO_ERM(task_name='smac_optimization', budget_type='FEs', budget=2000, seed=42, workload=0,algorithm='ERM',architecture='resnet', model_size=18, optimizer='smac')

# Define the objective function
def objective(configuration, seed: int = 0):
    start = time.time()
    result = hpo.objective_function(configuration=configuration.get_dictionary())
    end = time.time()
    return 1 - result['function_value']  # SMAC minimizes, so we return 1 - accuracy

# Define the configuration space
def get_configspace():
    space = ConfigurationSpace()
    original_ranges = hpo.configuration_space.original_ranges
    for param_name, param_range in original_ranges.items():
        space.add_hyperparameter(cs.UniformFloatHyperparameter(param_name, lower=param_range[0], upper=param_range[1]))
    return space

if __name__ == "__main__":
    # Create the configuration space
    config_space = get_configspace()
    
    # Scenario object specifying the optimization environment
    scenario = Scenario(config_space, deterministic=True, n_trials=200)
    
    # Use SMAC to find the best configuration/hyperparameters
    smac = HyperparameterOptimizationFacade(scenario, objective)
    incumbent = smac.optimize()
    
    # Print the best configuration and its performance
    print(f"Best configuration: {incumbent}")
    print(f"Best performance: {1 - smac.intensifier.trajectory[-1].cost}")  # Convert back to accuracy


================================================
FILE: tests/EXP_tpe.py
================================================
import ConfigSpace as cs
import time
import numpy as np
from typing import Any, Dict, List, Optional, Protocol, Tuple

from tpe.optimizer import TPEOptimizer

from transopt.benchmark.HPO.HPO import HPO_ERM
from tpe.optimizer.base_optimizer import BaseOptimizer, ObjectiveFunc

# Create a single HPO_ERM instance
hpo = HPO_ERM(task_name='tpe_optimization', budget_type='FEs', budget=100, seed=42, workload=0, optimizer='tpe')

class formal_obj(ObjectiveFunc):
    def __init__(self, f):
        self.f = f
    
    def __call__(self, eval_config: Dict[str, Any]) -> Tuple[Dict[str, float], float]:
        start = time.time()
        results = self.f(eval_config)
        return {'loss': 1 - results['function_value']}, time.time() - start

# Create an instance of formal_obj with hpo.objective_function

# Define the configuration space
def get_configspace():
    original_ranges = hpo.configuration_space.original_ranges
    hyperparameters = [cs.UniformFloatHyperparameter(param_name, lower=param_range[0], upper=param_range[1]) for param_name, param_range in original_ranges.items() ]
    space = cs.ConfigurationSpace(hyperparameters)
    
    return space

if __name__ == "__main__":
    # Create the configuration space
    config_space = get_configspace()
    obj_f = formal_obj(hpo.objective_function)

    # Initialize TPE Optimizer
    opt = TPEOptimizer(obj_func=obj_f, config_space=config_space, n_init=10, max_evals=100, resultfile='tpe_results.json')
    
    # Run optimization
    best_config, best_value = opt.optimize()


================================================
FILE: tests/data_analysis.py
================================================
import os
import json
import re
import numpy as np
import matplotlib.pyplot as plt
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
from scipy import stats
from statsmodels.formula.api import ols
from sklearn.decomposition import PCA
from mpl_toolkits.mplot3d import Axes3D

def load_data(data_folder):
    data = {}
    for filename in os.listdir(data_folder):
        file_path = os.path.join(data_folder, filename)
        if os.path.isfile(file_path):
            with open(file_path, 'r') as f:
                content = json.load(f)
                x = []
                for key in ['lr', 'weight_decay', 'momentum', 'dropout_rate']:
                    pattern = rf'({key}_)([\d.e-]+)'
                    match = re.search(pattern, filename)
                    if match:
                        value = float(match.group(2))
                        if key in ['lr', 'weight_decay']:
                            x.append(np.log10(value))
                        else:
                            x.append(value)
                data[filename] = {
                    'x': x,
                    'test_standard_acc': content['test_standard_acc'],
                    'test_robust_acc': np.mean([v for k, v in content.items() if k.startswith('test_') and k != 'test_standard_acc'])
                }
    return data

def get_non_dominated_solutions(data):
    F = np.array([[1 - d['test_standard_acc'], 1 - d['test_robust_acc']] for d in data.values()])
    nds = NonDominatedSorting()
    fronts = nds.do(F)
    non_dominated = fronts[0]
    return F[non_dominated]

def plot_non_dominated_solutions(ax, solutions, label, color):
    ax.scatter(solutions[:, 0], solutions[:, 1], label=label, color=color)
    # ax.plot(solutions[:, 0], solutions[:, 1], color=color)
# 主函数
    

def compare_nsga2_results_all(res):
    fig, ax = plt.subplots(figsize=(10, 8))

    # Define color mapping
    colors = plt.cm.rainbow(np.linspace(0, 1, len(res)))

    for (k, v), color in zip(res.items(), colors):
        data = load_data(v)
        
        # Get all data points
        all_points = np.array([[1 - d['test_standard_acc'], 1 - d['test_robust_acc']] for d in data.values()])
        
        # Get non-dominated solutions
        non_dominated_data = get_non_dominated_solutions(data)

        # Plot all points with transparency
        ax.scatter(all_points[:, 0], all_points[:, 1], label=f'{k} (all)', color=color, alpha=0.3)
        
        # Plot non-dominated solutions without transparency
        ax.scatter(non_dominated_data[:, 0], non_dominated_data[:, 1], label=f'{k} (non-dominated)', color=color, edgecolors='black')

    # Set chart properties
    ax.set_xlabel('Test Standard Accuracy')
    ax.set_ylabel('Test Robust Accuracy')
    ax.set_title('Comparison of Solutions for Different Sizes')
    ax.legend()
    ax.grid(True)

    # Invert x and y axes to show accuracy instead of error
    ax.invert_xaxis()
    ax.invert_yaxis()

    # Save the chart
    plt.savefig('compare_all.png')
    plt.close(fig)
    

def compare_nsga2_results(res):
    # 加载两个文件夹的数据
    all_res = {}
    fig, ax = plt.subplots(figsize=(10, 8))

    # 定义颜色映射
    colors = plt.cm.rainbow(np.linspace(0, 1, len(res)))

    for (k, v), color in zip(res.items(), colors):
        data = load_data(v)
        
        # 获取非支配解
        non_dominated_data = get_non_dominated_solutions(data)

        # 对非支配解进行1-操作
        non_dominated_data = 1 - non_dominated_data

        # 绘制非支配解，使用不同的颜色
        plot_non_dominated_solutions(ax, non_dominated_data, f'{k}', color)

    # 设置图表属性
    ax.set_xlabel('Test Standard Accuracy')
    ax.set_ylabel('Test Robust Accuracy')
    ax.set_title('Comparison of Non-Dominated Solutions for Different Sizes')
    ax.legend()
    ax.grid(True)

    # 显示图表
    plt.savefig('compare.png')
    
def calculate_variable_importance(res):
    # 聚合所有场景的数据
    aggregated_data = {}
    for k, v in res.items():
        data = load_data(v)
        for key, value in data.items():
            if key not in aggregated_data:
                aggregated_data[key] = value

    # 定义变量名称
    variable_names = ['lr', 'weightdecay', 'momentum', 'dropout_rate']
    importance = {'test_standard_acc': {}, 'test_robust_acc': {}}

    for i, var_name in enumerate(variable_names):
        X = np.array([entry['x'][i] for entry in aggregated_data.values()])
        y_standard = np.array([entry['test_standard_acc'] for entry in aggregated_data.values()])
        y_robust = np.array([entry['test_robust_acc'] for entry in aggregated_data.values()])

        # 使用线性回归模型来评估变量重要性
        model_standard = ols(f'y ~ x', data={'x': X, 'y': y_standard}).fit()
        model_robust = ols(f'y ~ x', data={'x': X, 'y': y_robust}).fit()

        # 计算F-value和p-value作为重要性指标
        importance['test_standard_acc'][var_name] = {
            'f_value': model_standard.fvalue,
            'p_value': model_standard.f_pvalue
        }
        importance['test_robust_acc'][var_name] = {
            'f_value': model_robust.fvalue,
            'p_value': model_robust.f_pvalue
        }

    return importance

def plot_variable_importance(importance):
    vars = list(importance['test_standard_acc'].keys())
    f_values_standard = [stats['f_value'] for stats in importance['test_standard_acc'].values()]
    f_values_robust = [stats['f_value'] for stats in importance['test_robust_acc'].values()]

    x = np.arange(len(vars))
    width = 0.35

    fig, ax = plt.subplots(figsize=(12, 6))
    rects1 = ax.bar(x - width/2, f_values_standard, width, label='Test Standard Accuracy')
    rects2 = ax.bar(x + width/2, f_values_robust, width, label='Test Robust Accuracy')

    ax.set_ylabel('F-value')
    ax.set_title('Variable Importance Comparison')
    ax.set_xticks(x)
    ax.set_xticklabels(vars, rotation=45)
    ax.legend()

    plt.tight_layout()
    plt.savefig('variable_importance_comparison.png')
    plt.close()

def visualize_data_with_metrics(data, metric_name, output_file):
    """
    Visualize the input data with corresponding metrics.
    
    :param data: A dictionary where keys are sample names and values are dictionaries
                 containing 'x' (input variables) and metric values.
    :param metric_name: The name of the metric to visualize.
    :param output_file: The name of the file to save the plot.
    """
    X = np.array([sample['x'] for sample in data.values()])
    y = np.array([sample[metric_name] for sample in data.values()])

    # Check the dimensionality of X
    n_dims = X.shape[1]

    if n_dims <= 2:
        # If X has 2 or fewer dimensions, plot directly
        fig = plt.figure(figsize=(10, 8))
        if n_dims == 1:
            plt.scatter(X, y, c=y, cmap='viridis')
            plt.xlabel('Input Variable')
        else:  # n_dims == 2
            plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis')
            plt.xlabel('First Input Variable')
            plt.ylabel('Second Input Variable')
    else:
        # If X has more than 2 dimensions, use PCA for dimensionality reduction
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(X)

        fig = plt.figure(figsize=(10, 8))
        plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y, cmap='viridis')
        plt.xlabel('First Principal Component')
        plt.ylabel('Second Principal Component')

    plt.colorbar(label=metric_name)
    plt.title(f'{metric_name} vs Input Variables')
    plt.tight_layout()
    plt.savefig(output_file)
    plt.close(fig)

    print(f"Plot saved as {output_file}")

# Example usage in the main function:
if __name__ == "__main__":
    # results_18 = './results_aug/non_augment'
    # results_34 = './results_size/results_34'
    results_50 = './results_size/results_50'
    # res = {'res_18':results_18, 'res_34':results_34, 'res_50':results_50}
    # compare_nsga2_results_all(res)
    
    
    # results_non_aug = './results_aug/non_augment'
    # results_aug =  './results_aug/augment'
    res = { 'without augment':results_50}
    # compare_nsga2_results_all(res)
    

    # 计算变量重要性
    importance = calculate_variable_importance(res)
    
    # 打印重要性结果
    print("Variable Importance:")
    for metric, vars in importance.items():
        print(f"\n{metric}:")
        for var, stats in vars.items():
            print(f"  {var}: F-value = {stats['f_value']:.4f}, p-value = {stats['p_value']:.4f}")

    # 可视化重要性
    plot_variable_importance(importance)

    # Load data
    results_non_aug = './results_size/results_34'
    data = load_data(results_non_aug)

    # Visualize data for standard accuracy
    visualize_data_with_metrics(data, 'test_standard_acc', 'standard_acc_visualization.png')

    # Visualize data for robust accuracy
    visualize_data_with_metrics(data, 'test_robust_acc', 'robust_acc_visualization.png')


================================================
FILE: transopt/ResultAnalysis/AnalysisBase.py
================================================
import abc
import json
from collections import defaultdict
from dataclasses import dataclass
from typing import Dict, Hashable, List, Tuple, Union

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

from transopt.KnowledgeBase import KnowledgeBase
from transopt.utils.serialization import (convert_np_to_bulidin,
                                          output_to_ndarray,
                                          vectors_to_ndarray)


@dataclass
class Result():
    """
    Class to store the results of the analysis.
    """
    def __init__(self):
        self.X = None
        self.Y = None
        self.best_X = None
        self.best_Y = None


class AnalysisBase(abc.ABC, metaclass=abc.ABCMeta):
    def __init__(self, exper_folder, methods, seeds, tasks, start = 0, end = None):
        self._exper_folder = exper_folder
        self._methods = methods
        self._seeds = seeds
        self._tasks = tasks
        self._init = start
        self._end = end
        self.results = {}
        self._task_names = set()
        self._colors = self.assign_colors_to_methods()

    def read_data_from_kb(self):
        for method in self._methods:
            self.results[method] = defaultdict(dict)
            for seed in self._seeds:
                self.results[method][seed] = defaultdict(dict)

                file_path = f'{self._exper_folder}/{method}/{seed}_KB.json'
                database = KnowledgeBase(file_path)

                for dataset_id in database.get_all_dataset_id():
                    dataset = database.get_dataset_by_id(dataset_id)
                    task_name = dataset['name']
                    if task_name.split('_')[0] not in self._tasks:
                        continue

                    input_vector = dataset['input_vector']
                    output_value = dataset['output_value']
                    r = Result()
                    r.X = vectors_to_ndarray(dataset['dataset_info']['variable_name'], input_vector)
                    r.Y = output_to_ndarray(output_value)
                    if self._end is not None:
                        r.X = r.X[:self._end]
                        r.Y = r.Y[:self._end]
                    else:
                        assert len(r.Y) == len(r.X)
                        self._end = len(r.Y)
                    best_id = np.argmin(r.Y)
                    r.best_Y = r.Y[best_id]
                    r.best_X = r.X[best_id]

                    self.results[method][seed][task_name] = r
                    self._task_names.add(task_name)

    def save_results_to_json(self, file_path):
        with open(file_path, 'w') as f:
            json.dump(self.results, f, default=convert_np_to_bulidin)

    def load_results_from_json(self, file_path):
        def convert(dct):
            if 'type' in dct and dct['type'] == 'ndarray':
                return np.array(dct['value'])
            return dct

        with open(file_path, 'r') as f:
            self.results = json.load(f, object_hook=convert)

    def get_results_by_order(self, order=None):
        """
        Get results from the nested dictionary based on the specified order.
        Args:
            order (list, optional): The order in which results should be organized.
                Defaults to ["task", "method", "seed"].
        Returns:
            dict: A dictionary of results organized according to the specified order.
        """

        if order is None:
            order = ["task", "method", "seed"]

        valid_keys = {"task", "method", "seed"}
        assert len(order) == 3 and set(order) == valid_keys, "Order must be a permutation of 'task', 'method', and 'seed'"

        # Retrieve the corresponding category based on the type of the key
        def get_key(key):
            if key == 'task':
                return self._task_names
            elif key == 'method':
                return self._methods
            elif key == 'seed':
                return self._seeds

        # Retrieve the corresponding data from the existing results.
        def get_from_original_results(key_list):
            first_original_key = key_list[order.index('method')]
            second_original_key = key_list[order.index('seed')]
            third_original_key = key_list[order.index('task')]
            return self.results[first_original_key][second_original_key][third_original_key]

        # Define dictionaries for each level of order
        levels = {key: get_key(key) for key in order}

        new_results = {}
        for first_key in levels[order[0]]:
            new_results[first_key] = defaultdict(dict)
            for second_key in levels[order[1]]:
                new_results[first_key][second_key] = defaultdict(dict)
                for third_key in levels[order[2]]:
                    new_results[first_key][second_key][third_key] = get_from_original_results(
                        [first_key, second_key, third_key])

        return new_results

    def assign_colors_to_methods(self):
        """
        Assign a unique color from Matplotlib's 'tab10' color cycle to each method.

        Args:
        methods (list): A list of method names.

        Returns:
        dict: A dictionary where keys are method names and values are their assigned colors.
        """
        # Using the 'tab10' color cycle from Matplotlib
        rgb_colors = [
            (141, 211, 199),
            (255, 255, 179),
            (190, 186, 218),
            (251, 128, 114),
            (128, 177, 211),
            (253, 180, 98),
            (179, 222, 105),
            (252, 205, 229),
            (217, 217, 217),
            (188, 128, 189),
            (204, 235, 197)
        ]

        color_strings = []
        for rgb in rgb_colors:
            color_str = f"rgb,255:red,{rgb[0]}; green,{rgb[1]}; blue,{rgb[2]}"
            color_strings.append(color_str)

        # Creating a dictionary to store method names and their assigned colors
        method_colors = {}
        for i, method in enumerate(self._methods):
            color_index = i % len(color_strings)  # Cycle through colors if there are more methods than colors
            color = color_strings[color_index]
            method_colors[method] = color

        return method_colors

    def get_color_for_method(self, method:Union[List,str]):
        """
        Get the color(s) associated with a specific method or a list of methods.

        Args:
        method (str or list): The name of the method or a list of method names.

        Returns:
        str or list: The hex color code(s) associated with the method(s).
        """
        if isinstance(method, str):
            if method not in self._colors:
                raise ValueError(f"Method {method} not found in colors dictionary")
            return self._colors[method]

        elif isinstance(method, list):
            colors = []
            for m in method:
                if m not in self._colors:
                    raise ValueError(f"Method {m} not found in colors dictionary")
                colors.append(self._colors[m])
            return colors

        else:
            raise TypeError("Input must be a string or a list of strings")

    def get_methods(self):
        """
        Get the list of methods used in the analysis.

        Returns:
        list: A list of method names.
        """
        return self._methods

    def get_task_names(self):
        """
        Get the list of task names used in the analysis.

        Returns:
        list: A list of task names.
        """
        return self._task_names

    def get_seeds(self):
        """
        Get the list of seeds used in the analysis.

        Returns:
        list: A list of seeds.
        """
        return self._seeds

================================================
FILE: transopt/ResultAnalysis/AnalysisPipeline.py
================================================
from transopt.ResultAnalysis.PlotAnalysis import plot_registry
from transopt.ResultAnalysis.TableAnalysis import table_registry
from transopt.ResultAnalysis.TrackOptimization import track_registry
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
from transopt.ResultAnalysis.AnalysisReport import create_report


def analysis_pipeline(Exper_folder, tasks, methods, seeds, args):
    ab = AnalysisBase(Exper_folder, tasks=tasks,methods= methods,seeds= seeds)
    ab.read_data_from_kb()
    Exper_folder = Exper_folder / 'analysis'
    if args.comparision:
        for plot_name, plot_func in plot_registry.items():
            plot_func(ab, Exper_folder)  # 假设你的度量函数需要额外的参数

        for table_name, table_func in table_registry.items():
            table_func(ab, Exper_folder)  # 假设你的度量函数需要额外的参数

    if args.track:
        pass

    if args.report:
        create_report(Exper_folder)


================================================
FILE: transopt/ResultAnalysis/AnalysisReport.py
================================================
import os
from pdf2image import convert_from_path
from transopt.ResultAnalysis.ReportNote import Notes


def pdf_to_png(pictures_path):
    assert os.path.exists(pictures_path), "File 'Pictures' isn't exist!"
    pdf_files = [f for f in os.listdir(pictures_path) if f.endswith('.pdf')]
    pictures = []

    for pdf_file in pdf_files:
        pdf_path = os.path.join(pictures_path, pdf_file)
        images = convert_from_path(pdf_path, dpi=1000, fmt='png')
        for image in images:
            image.save(os.path.join(pictures_path, f"{pdf_file.split('.')[0]}.png"), 'png')
        pictures.append(pdf_file.split('.')[0])
    return pictures


def create_details_report(details_folders, save_path):
    for details_folder in details_folders:
        html_begin = f"""
        <!DOCTYPE html>
        <html>
            <head>
                <meta charset="UTF-8">
                <title> {details_folder.title().replace('_', ' ')} </title>
        """
        html_begin += """
                <style>
                    body {
                        align-items: center;
                        text-align: center;
                    }

                    .title_container {
                        background-color: #024098;
                        height: 100px;
                        line-height: 100px;
                    }

                    .title {
                        color: white;
                        /* display: flex; */
                        align-items: center;
                        justify-content: center;
                        white-space: nowrap;
                    }

                    .container {
                        display: flex;
                        flex-wrap: wrap;
                        /* align-items: center; */
                        justify-content: center;
                    }

                    .figure {
                        margin-left: 10px;
                        margin-right: 10px;
                    }

                    .button {
                        display: inline-block;
                        border-radius: 7px;
                        background: #024098;
                        color: white;
                        text-align: center;
                        font-size: 20px;
                        width: 100px;
                        height: 30px;
                        cursor: pointer;
                        text-decoration: none;
                        margin-top: 15px;
                    }
                </style>
            </head>
        """
        html_begin += f"""
        <body>
            <div class="title_container">
                <h1 class="title">{details_folder.title().replace('_', ' ')}</h1>
            </div>

            <a class="button" href="../Report.html">Back</a>
        """
        html_end = """
        </body>
        </html>
        """

        pictures_path = save_path / details_folder
        pictures = pdf_to_png(pictures_path)
        function_name = set()
        html_content = """"""
        for picture in pictures:
            # 将同一种函数归为一类
            if picture.split('_')[0] not in function_name:
                if len(function_name) != 0:
                    html_content += """
                        </div>

                    """
                function_name.add(picture.split('_')[0])
                html_content += f"""
                    <h2>{picture.split('_')[0]}</h2>
                    <div class="container">
                """
            html_content += f"""
                    <a class="figure" href="{picture}.png"><IMG SRC="{picture}.png" width="350px"></a>
            """

        with open(pictures_path / f"{details_folder.title().replace('_', ' ')}.html", 'w', encoding='utf-8') as html:
            html.write(html_begin + html_content + html_end)


def create_table_report(save_path):
    html_begin = """
    <!DOCTYPE html>
    <html>

    <head>
        <meta charset="UTF-8">
        <title> Tables </title>

        <style>
            body {
                align-items: center;
                text-align: center;
            }

            .title_container {
                background-color: #024098;
                height: 100px;
                line-height: 100px;
            }

            .title {
                color: white;
                align-items: center;
                justify-content: center;
                white-space: nowrap;
            }

            .container {
                display: flex;
                flex-direction: column;
                align-items: center;
                justify-content: center;
            }

            .report_container {
                padding: 50px;
            }

            .report {
                width: 1050px;
                border: 3px solid #024098;
                border-radius: 15px;
            }

            .report_title {
                color: white;
                background-color: #024098;
                border-radius: 10px 10px 0 0;
                height: 50px;
                line-height: 50px;
                margin: 0;
            }

            .content {
                padding-top: 15px;
                padding-left: 10px;
                padding-right: 10px;
            }

            .report_figure {
                align-items: center;
            }

            .report_note {
                text-align: justify;
                margin: 10px;
            }

            .button {
                display: inline-block;
                border-radius: 7px;
                background: #024098;
                color: white;
                text-align: center;
                font-size: 20px;
                width: 100px;
                height: 30px;
                cursor: pointer;
                text-decoration: none;
                margin-top: 15px;
            }
        </style>
    </head>

    <body>
        <div class="title_container">
            <h1 class="title">Tables</h1>
        </div>

        <a class="button" href="../../Report.html">Back</a>

        <div class="container">
    """
    html_end = """
        </div>
    </body>

    </html>
    """

    tables = pdf_to_png(save_path)
    html_content = """"""
    for table in tables:
        report_container = f"""
                <div class="report_container">
                    <div class="report">
                        <h2 class="report_title">{table.title().replace('_', ' ')}</h2>
                        <div class="content">
                            <div class="report_figure">
                                <a href="{table}.png"><IMG
                                        SRC="{table}.png" width="1000px"></a>
                            </div>
                        </div>
                    </div>
                </div>
        """
        html_content += report_container

    with open(save_path / 'Tables.html', 'w', encoding='utf-8') as html:
        html.write(html_begin + html_content + html_end)


def create_report(save_path):
    html_begin = """
    <!DOCTYPE html>
    <html>
        <head>
            <meta charset="UTF-8">
            <title> Analysis Report </title>

            <style>
                body {
                    align-items: center;
                    text-align: center;
                }

                .title_container {
                    background-color: #024098;
                    height: 100px;
                    line-height: 100px;
                }

                .title {
                    color: white;
                    align-items: center;
                    justify-content: center;
                    white-space: nowrap;
                }

                .container {
                    display: flex;
                    flex-wrap: wrap;
                    justify-content: center;
                }

                .report_container  {
                    padding: 50px;
                }

                .report {
                    width: 520px;
                    height: 600px;
                    border: 3px solid #024098;
                    border-radius: 15px;
                }

                .report_title {
                    color: white;
                    background-color: #024098;
                    border-radius: 10px 10px 0 0;
                    height: 50px;
                    line-height: 50px;
                    margin: 0;
                }

                .content {
                    padding-top: 15px;
                    padding-left: 10px;
                    padding-right: 10px;
                }

                .report_figure{
                    align-items: center;
                }

                .report_note {
                    text-align: justify;
                    margin: 10px;
                }

                .button {
                    display: inline-block;
                    border-radius: 7px;
                    background: #024098b0;
                    color: white;
                    text-align: center;
                    font-size: 20px;
                    width: 400px;
                    height: 30px;
                    cursor: pointer;
                    text-decoration: none;
                    margin-top: 15px;
                }
            </style>
        </head>
    <body>
        <div class="title_container">
            <h1 class="title">Analysis Rusults Report</h1>
        </div>

        <div class="container">
    """
    html_end = """
        </div>
    </body>
    </html>
    """

    # 读取生成的图片名，并写入html
    pictures_path = save_path / 'Overview' / 'Pictures'
    pictures = pdf_to_png(pictures_path)
    html_content = """"""
    for picture in pictures:
        report_container = f"""
        <div class="report_container">
            <div class="report">
                <h2 class="report_title">{picture.title().replace('_', ' ')}</h2>
                <div class="content">
                    <div class="report_figure">
                        <a href="Overview/Pictures/{picture}.png"><IMG SRC="Overview/Pictures/{picture}.png" width="450px"></a>
                    </div>
                    <div class="report_note">
                        <p><b>Note:</b> {Notes[picture]} </p>
                    </div>
                </div>
            </div>
        </div>
        """
        html_content += report_container

    # 更多 information 的链接
    table_path = save_path / 'Overview' / 'Table'
    create_table_report(table_path)
    report_container = """
        <div class="report_container">
            <div class="report">
                <h2 class="report_title">More Information</h2>
                <div class="content">
                    <a class="button" href="Overview/Table/Tables.html">Tables</a>
    """

    folders = [f for f in os.listdir(save_path) if os.path.isdir(os.path.join(save_path, f))]
    details_folders = [f for f in folders if f != 'Overview']
    if len(details_folders) != 0:
        create_details_report(details_folders, save_path)
        for details_folder in details_folders:
            report_container += f"""
                    <a class="button" href="{details_folder}/{details_folder.title().replace('_', ' ')}.html">{details_folder.title().replace('_', ' ')}</a>
            """

    report_container += """
                </div>
            </div>
        </div>
    """
    html_content += report_container

    with open(save_path / 'Report.html', 'w', encoding='utf-8') as html:
        html.write(html_begin + html_content + html_end)


================================================
FILE: transopt/ResultAnalysis/CasualAnalysis.py
================================================

from transopt.ResultAnalysis.PlotAnalysis import plot_registry
from transopt.ResultAnalysis.TableAnalysis import table_registry
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
from transopt.ResultAnalysis.AnalysisReport import create_report


def casual_analysis(Exper_folder, tasks, methods, seeds, args):
    ab = AnalysisBase(Exper_folder, tasks=tasks,methods= methods,seeds= seeds)
    ab.read_data_from_kb()
    Exper_folder = Exper_folder / 'analysis'


================================================
FILE: transopt/ResultAnalysis/CompileTex.py
================================================
import os
import subprocess
import shutil


def compile_tex(tex_path, output_folder):
    # 保存当前工作目录
    original_cwd = os.getcwd()

    # 将路径转换为绝对路径
    tex_path = os.path.abspath(tex_path)
    output_folder = os.path.abspath(output_folder)

    # 获取文件名和文件夹路径
    folder, filename = os.path.split(tex_path)
    name, _ = os.path.splitext(filename)

    # 切换到tex文件所在的文件夹
    os.chdir(folder)

    try:
        # 编译tex文件
        subprocess.run(['pdflatex', filename], check=True)

        # 裁剪PDF文件
        pdf_path = os.path.join(folder, name + '.pdf')
        cropped_pdf_path = pdf_path.replace('.pdf', '-crop.pdf')
        subprocess.run(['pdfcrop', pdf_path, cropped_pdf_path], check=True)

        # 将裁剪后的PDF文件移动到输出文件夹，并去掉-crop
        output_pdf_path = os.path.join(output_folder, name + '.pdf')
        shutil.move(cropped_pdf_path, output_pdf_path)

    except subprocess.CalledProcessError as e:
        print(f"命令执行失败: {e}")
    finally:
        # 切换回原始工作目录
        os.chdir(original_cwd)

    # 删除.aux和.log文件以及未裁剪的PDF文件
    aux_path = os.path.join(folder, name + '.aux')
    log_path = os.path.join(folder, name + '.log')
    if os.path.exists(aux_path):
        os.remove(aux_path)
    if os.path.exists(log_path):
        os.remove(log_path)
    if os.path.exists(pdf_path):
        os.remove(pdf_path)

================================================
FILE: transopt/ResultAnalysis/CorrelationAnalysis.py
================================================


import numpy as np
import dcor
from sklearn.metrics import mutual_info_score
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
from transopt.utils.Normalization import normalize

def correlation_analysis(Exper_folder, tasks, methods, seeds, args):
    ab = AnalysisBase(Exper_folder, tasks=tasks,methods= methods,seeds= seeds, args=args)
    ab.read_data_from_kb()
    task_names = ab.get_task_names()
    for method in methods:
        for seed in seeds:
            for task in task_names:
                a = MutualInformation(ab, task, method, seed)
    Exper_folder = Exper_folder / 'analysis'

def MutualInformation(ab:AnalysisBase, dataset_name, method, seed):
    results = ab.get_results_by_order(['method', 'seed', 'task'])
    res = results[method][seed][dataset_name]
    Y = res.Y
    num_objective = Y.shape[0]

    mi = mutual_info_score(normalize(Y[0]), normalize(Y[1]))
    distance_corr = dcor.distance_correlation(normalize(Y[0]), normalize(Y[1]))

    print("Distance Correlation:", distance_corr)
    print("Mutual Information:", mi)


================================================
FILE: transopt/ResultAnalysis/MakeGif.py
================================================
import os
from PIL import Image

def make_gif(folder_path):
    # 获取文件夹中的所有图片文件
    image_files = [file for file in os.listdir(folder_path) if file.endswith('.png')]

    # 按照图片序号进行排序
    image_files.sort(key=lambda x: int(x.split('_')[0]))

    images = []
    for file in image_files:
        # 读取每个图片文件
        image_path = os.path.join(folder_path, file)
        image = Image.open(image_path)

        # 将图片添加到列表中
        images.append(image)

    # 设置保存 GIF 的文件路径和名称
    gif_path = os.path.join(folder_path, 'animation.gif')

    # 将图片列表保存为 GIF 动画
    images[0].save(gif_path, save_all=True, append_images=images[1:], duration=1000, loop=0,)

if __name__ == '__main__':
    task_list_2d = [
        'Ackley_10_s',
        # 'StyblinskiTang_10_s',
        'MPB5_10_s',
        'LevyR_10_s',
        # 'SVM_10_s',
    ]
    task_list_5d = [
        # 'Ackley_10_s',
        # 'StyblinskiTang_10_s',
        # 'MPB5_10_s',
        # 'LevyR_10_s',
        'NN_72_s',
    ]

    task_list_8d = [
        # 'Ackley_10_s',
        # 'StyblinskiTang_10_s',
        # 'MPB5_10_s',
        'LevyR_10_s',
        # 'XGB_10_s',
    ]

    Dim_ = 2
    Method_list = [
        # 'INC_MHGP',
        # 'WS_RGPE',
        # 'MT_MOGP',
        # 'LFL_MOGP',
        # 'ELLA_GP',
        # 'BO_GP',
        'TMTGP'
    ]
    # Seed_list = list(range(10))
    Seed_list = [0]

    Exp_name = 'test5'
    Exper_floder = '../../LFL_experiments/{}'.format(Exp_name)

    if Dim_ == 2:
        task_list = task_list_2d
    elif Dim_ == 5:
        task_list = task_list_5d
    elif Dim_ == 8:
        task_list = task_list_8d

    # for Method in Method_list:
    #     for Prob in task_list:
    #         for seed in Seed_list:
    #             for i in range(int(Prob.split('_')[1])):
    #                 make_gif(Exper_floder+f"/figs/contour/{Method}/{seed}/{Prob.split('_')[0]}_{i}_{Prob.split('_')[2]}")

    for Method in Method_list:
        for Prob in task_list:
            for seed in Seed_list:
                for i in range(int(Prob.split('_')[1])):
                    make_gif(Exper_floder+f"/figs/contour/{Method}/{seed}/{Prob.split('_')[0]}_{i}_{Prob.split('_')[2]}")

================================================
FILE: transopt/ResultAnalysis/PFAnalysis.py
================================================
import numpy as np
from sklearn.metrics import mutual_info_score

from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
from transopt.utils.Normalization import normalize


def parego_analysis(Exper_folder, tasks, methods, seeds, args):
    ab = AnalysisBase(Exper_folder, tasks=tasks,methods= methods,seeds= seeds, args=args)
    ab.read_data_from_kb()
    task_names = ab.get_task_names()
    for method in methods:
        for seed in seeds:
            for task in task_names:
                a = MutualInformation(ab, task, method, seed)
    Exper_folder = Exper_folder / 'analysis'

================================================
FILE: transopt/ResultAnalysis/PlotAnalysis.py
================================================
import numpy as np
from collections import Counter, defaultdict
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
import matplotlib.pyplot as plt
from matplotlib.pyplot import MultipleLocator
import pandas as pds
import os
import seaborn as sns
from transopt.utils.sk import Rx
from pathlib import Path
import scipy
import tikzplotlib
from sklearn.cluster import DBSCAN
from transopt.ResultAnalysis.CompileTex import compile_tex
import matplotlib.gridspec as gridspec
plot_registry = {}
import re

# 注册函数的装饰器
def plot_register(name):
    def decorator(func_or_class):
        if name in plot_registry:
            raise ValueError(f"Error: '{name}' is already registered.")
        plot_registry[name] = func_or_class
        return func_or_class
    return decorator


@plot_register('sk')
def plot_sk(ab:AnalysisBase, save_path:Path):
    cr_results = {}
    results = ab.get_results_by_order(["task", "method", "seed"])

    for task_name, tasks_r in results.items():
        result = {}
        for method, method_r in tasks_r.items():
            cr_list = []
            for seed, result_obj in method_r.items():
                cr = result_obj.best_Y
                cr_list.append(cr)
            result[method] = cr_list

        a = Rx.data(**result)
        RES = Rx.sk(a)
        for r in RES:
            if r.rx in cr_results:
                cr_results[r.rx].append(r.rank)
            else:
                cr_results[r.rx] = [r.rank]

    df = pds.DataFrame(cr_results)

    sns.set_theme(style="whitegrid", font='FreeSerif')
    plt.figure(figsize=(12, 7.3))
    # plt.ylim(bottom=0.9, top=len(method_names)+0.1)
    ax = plt.gca()  # 获取坐标轴对象
    y_major_locator = MultipleLocator(1)  # 设置坐标的主要刻度间隔
    ax.yaxis.set_major_locator(y_major_locator)  # 应用在纵坐标上
    sns.violinplot(data=df, inner="quart")
    plt.title('Skott knott', fontsize=30, y=1.01)
    plt.xlabel('Algorithm Name', fontsize=25, labelpad=-7)
    plt.ylabel('Rank', fontsize=25)
    plt.yticks(fontsize=20)
    plt.xticks(fontsize=20, rotation=10)

    save_path = Path(save_path / 'Overview')
    pdf_path = Path(save_path / 'Pictures')
    tex_path = Path(save_path / 'tex')
    save_path.mkdir(parents=True, exist_ok=True)
    pdf_path.mkdir(parents=True, exist_ok=True)
    tex_path.mkdir(parents=True, exist_ok=True)
    tikzplotlib.save(tex_path / "scott_knott.tex")

    with open(tex_path / "scott_knott.tex", 'r', encoding='utf-8') as f:
        content = f.read()

    # 添加preamble和end document
    preamble = r"\documentclass{article}" + "\n" + \
               r"\usepackage{pgfplots}" + "\n" + \
               r"\usepackage{tikz}" + "\n" + \
               r"\begin{document}" + "\n" + \
               r"\pagestyle{empty}" + "\n"
    end_document = r"\end{document}" + "\n"
    # 替换 false 为 true
    content = re.sub(r'majorticks=false', 'majorticks=true', content)
    pattern = r'axis line style={lightgray204},\n'
    content = re.sub(pattern, '', content)
    # 插入字号控制
    insert_text = r"font=\large," + "\n" + \
                  r"tick label style={font=\small}," + "\n" + \
                  r"label style={font=\normalsize}," + "\n"
    insert_position = content.find(r'tick align=outside,')
    modified_content = content[:insert_position] + insert_text + content[insert_position:]

    # 将修改后的内容写回文件
    with open(tex_path / "scott_knott.tex", 'w', encoding='utf-8') as f:
        f.write(preamble + modified_content + end_document)

    compile_tex(tex_path / "scott_knott.tex", pdf_path)
    plt.close()


@plot_register('cr')
def convergence_rate(ab:AnalysisBase, save_path:Path, **kwargs):
    cr_list = []
    cr_all = {}
    cr_results = {}
    def acc_iter(Y, anchor_value):
        for i in range(1, len(Y)):
            best_fn = np.min(Y[:i])
            if best_fn <= anchor_value:
                return i/len(Y)
        return 1

    results = ab.get_results_by_order(["method", "seed", "task"])
    best_Y_values = defaultdict(list)

    # 遍历 data 字典，收集 best_Y 值
    for method, tasks in results.items():
        for seed, task_seed in tasks.items():
            for task_name, result_obj in task_seed.items():
                best_Y = result_obj.best_Y
                if best_Y is not None:
                    best_Y_values[task_name].append(best_Y)

    # 计算并返回每个 task_name 下 best_Y 值的 3/4 分位数
    quantiles = {task_name: np.percentile(values, 75) for task_name, values in best_Y_values.items()}

    for method, tasks in results.items():
        for seed, task_seed in tasks.items():
            for task_name, result_obj in task_seed.items():
                Y = result_obj.Y
                if Y is None:
                    raise ValueError(f"Y is not set for method {method}, task {task_name}")

                cr = acc_iter(Y, anchor_value=quantiles[task_name])
                cr_list.append(cr)

        cr_all[method] = cr_list

    a = Rx.data(**cr_all)
    RES = Rx.sk(a)
    for r in RES:
        if r.rx in cr_results:
            cr_results[r.rx].append(r.rank)
        else:
            cr_results[r.rx] = [r.rank]

    cr_results = pds.DataFrame(cr_results)

    sns.set_theme(style="whitegrid", font='FreeSerif')
    plt.figure(figsize=(12, 7.3))
    # plt.ylim(bottom=0.9, top=len(method_names)+0.1)
    ax = plt.gca()  # 获取坐标轴对象
    y_major_locator = MultipleLocator(1)  # 设置坐标的主要刻度间隔
    ax.yaxis.set_major_locator(y_major_locator)  # 应用在纵坐标上
    sns.violinplot(data=cr_results, inner="quart")
    plt.title('Convergence Rate', fontsize=30, y=1.01)
    plt.xlabel('Algorithm Name', fontsize=25, labelpad=-7)
    plt.ylabel('Rate', fontsize=25)
    plt.yticks(fontsize=20)
    plt.xticks(fontsize=20, rotation=10)

    save_path = Path(save_path / 'Overview')
    pdf_path = Path(save_path / 'Pictures')
    tex_path = Path(save_path / 'tex')
    save_path.mkdir(parents=True, exist_ok=True)
    pdf_path.mkdir(parents=True, exist_ok=True)
    tex_path.mkdir(parents=True, exist_ok=True)
    tikzplotlib.save(tex_path / "convergence_rate.tex")

    with open(tex_path / "convergence_rate.tex", 'r', encoding='utf-8') as f:
        content = f.read()

    # 添加preamble和end document
    preamble = r"\documentclass{article}" + "\n" + \
               r"\usepackage{pgfplots}" + "\n" + \
               r"\usepackage{tikz}" + "\n" + \
               r"\begin{document}" + "\n" + \
               r"\pagestyle{empty}" + "\n"
    end_document = r"\end{document}" + "\n"
    # 替换 false 为 true
    content = re.sub(r'majorticks=false', 'majorticks=true', content)
    pattern = r'axis line style={lightgray204},\n'
    content = re.sub(pattern, '', content)
    # 插入字号控制
    insert_text = r"font=\large," + "\n" + \
                  r"tick label style={font=\small}," + "\n" + \
                  r"label style={font=\normalsize}," + "\n"
    insert_position = content.find(r'tick align=outside,')
    modified_content = content[:insert_position] + insert_text + content[insert_position:]

    # 将修改后的内容写回文件
    with open(tex_path / "convergence_rate.tex", 'w', encoding='utf-8') as f:
        f.write(preamble + modified_content + end_document)

    compile_tex(tex_path / "convergence_rate.tex", pdf_path)
    plt.close()


def save_traj_data(ab, save_path):
    # 先找出所有的任务名称
    results = ab.get_results_by_order(["task", "method", "seed"])

    for task_name, tasks_r in results.items():
        # 为每个任务创建一个字典来存储数据
        task_data = {}

        for method, method_r in tasks_r.items():
            res = []

            for seed, result_obj in method_r.items():
                Y = result_obj.Y
                if Y is not None:
                    res.append(np.minimum.accumulate(Y).flatten())

            if res:
                # 计算中位数和标准差
                res_array = np.array(res)
                median = np.median(res_array, axis=0)
                std = np.std(res_array, axis=0)

                # 将数据存储到字典中
                task_data[f'{method}_mean'] = median
                task_data[f'{method}_low'] = median - std
                task_data[f'{method}_high'] = median + std

        if task_data:
            # 创建保存路径
            os.makedirs(save_path / 'traj'/ 'tex', exist_ok=True)

            # 设置文件路径
            file_path = save_path / 'traj'/ 'tex'/ f"{task_name}.dat"

            # 获取序号的起点
            start_idx = ab._init

            # 选择从 start_idx 开始的数据
            end_idx = start_idx + len(median)
            for key in task_data.keys():
                task_data[key] = task_data[key][start_idx:end_idx]

            # 将数据保存到文件
            with open(file_path, 'w') as f:
                # 写入列名
                col_names = ' '.join(['id'] + list(task_data.keys()))
                f.write(col_names + '\n')

                # 写入数据
                for i in range(len(task_data[list(task_data.keys())[0]])):
                    row_data = ' '.join([str(start_idx + i)] + [f'{x[i]:0.8f}' for x in task_data.values()])
                    f.write(row_data + '\n')

            print(f"Data saved for {task_name}")

@plot_register('traj')
def traj2latex(ab: AnalysisBase, save_path: Path):
    # 从 ab 对象中获取任务名称和方法名称
    save_traj_data(ab, save_path)
    results = ab.get_results_by_order(["task", "method", "seed"])
    methods = ab.get_methods()

    # 从 ab 对象中获取 start_idx, y_max 和 y_min
    start_idx = ab._init
    end_idx = ab._end

    # 创建保存路径
    os.makedirs(save_path / 'traj' / 'tex', exist_ok=True)


    # 设置文件路径
    for task_name, tasks_r in results.items():
        all_data = []

        for method, method_r in tasks_r.items():
            for seed, result_obj in method_r.items():
                Y = result_obj.Y
                if Y is not None:
                    min_values = np.minimum.accumulate(Y)
                    all_data.append(min_values.flatten())

        if all_data:
            all_data = np.concatenate(all_data)
            y_min = np.min(all_data) - np.std(all_data)
            y_max = np.max(all_data) + np.std(all_data)

        tex_save_path = save_path / 'traj' / 'tex' / f"{task_name}.tex"
        data_file = f"{task_name}.dat"
        # 开始写入 LaTeX 代码
        latex_code = f"""
        \\documentclass{{article}}
        \\usepackage{{pgfplots}}
        \\usepackage{{tikz}}
        \\usetikzlibrary{{intersections}}
        \\usepackage{{helvet}}
        \\usepackage[eulergreek]{{sansmath}}
        \\usepgfplotslibrary{{fillbetween}}

        \\begin{{document}}
        \\pagestyle{{empty}}


        \\pgfplotsset{{compat=1.12,every axis/.append style={{
            font = \\large,
            grid = major,
            xlabel = {{\\# of FEs}},
            ylabel = {{$f(\\mathbf{{x}}^\\ast)$}},
            thick,
            xmin={start_idx},
            xmax={end_idx},  % Adjust as needed
            ymin={y_min},
            ymax={y_max},
            line width = 1pt,
            tick style = {{line width = 0.8pt}}
        }}}}
        \pgfplotsset{{every plot/.append style={{very thin}}}}
        \\begin{{tikzpicture}}
            \\begin{{axis}}[
                title={{${task_name}$}},
                width=\\textwidth,
                height=0.5\\textwidth,
            ]"""


        for method in methods:
            # 这里需要根据你的数据文件的具体结构来调整
            latex_code += f"""
            \\addplot[color={{{ab.get_color_for_method(method)}}}, solid, line width=1pt]table [x = id, y = {method}_mean]{{{data_file}}};
            \\addlegendentry{{{method}}};
            """

        for method in methods:
            # 这里需要根据你的数据文件的具体结构来调整

            latex_code += f"""
            \\addplot[color={{{ab.get_color_for_method(method)}}}, name path={method}_L, draw=none] table[x = id, y = {method}_low] {{{data_file}}};
            \\addplot[color={{{ab.get_color_for_method(method)}}}, name path={method}_U, draw=none] table[x = id, y = {method}_high] {{{data_file}}};
            \\addplot[color={{{ab.get_color_for_method(method)}}},opacity=0.3] fill between[of={method}_U and {method}_L];
            """

        latex_code += f"""
                    \\end{{axis}}
            \\end{{tikzpicture}}
        \\end{{document}}"""

        # 将 LaTeX 代码保存到文件
        with open(tex_save_path, 'w') as f:
            f.write(latex_code)
        try:
            compile_tex(tex_save_path, save_path / 'traj')
        except:
            pass

        print(f"LaTeX code has been saved to {tex_save_path}")


@plot_register('violin')
def plot_violin(ab:AnalysisBase, save_path, **kwargs):
    data = {'Method': [], 'Performance rank': []}
    method_names = set()

    results = ab.get_results_by_order(["task", "seed", "method"])

    for task_name, task_r in results.items():
        for seed, seed_r in task_r.items():
            res = {}
            for method, result_obj in seed_r.items():
                method_names.add(method)
                Y = result_obj.Y
                if Y is not None:
                    min_values = np.min(Y)
                    res[method] = min_values
            sorted_value = sorted(res.values())
            for v_id, v in enumerate(sorted_value):
                for k, vv in res.items():
                    if v == vv:
                        data['Method'].append(k)
                        data['Performance rank'].append(v_id+1)

    sns.set_theme(style="whitegrid", font='FreeSerif')
    plt.figure(figsize=(12, 7.3))
    plt.ylim(bottom=0.9, top=len(method_names)+0.1)
    ax = plt.gca()  # 获取坐标轴对象
    y_major_locator = MultipleLocator(1)  # 设置坐标的主要刻度间隔
    ax.yaxis.set_major_locator(y_major_locator)  # 应用在纵坐标上
    sns.violinplot(x='Method', y='Performance rank', data=data,
                   order=list(method_names),
                   inner="box", color="silver", cut=0, linewidth=3)
    plt.title('Violin plot', fontsize=30, y=1.01)
    plt.xlabel('Algorithm Name', fontsize=25, labelpad=-7)
    plt.ylabel('Performance rank', fontsize=25)
    plt.yticks(fontsize=20)
    plt.xticks(fontsize=20, rotation=10)

    save_path = Path(save_path / 'Overview')
    pdf_path = Path(save_path / 'Pictures')
    tex_path = Path(save_path / 'tex')
    save_path.mkdir(parents=True, exist_ok=True)
    pdf_path.mkdir(parents=True, exist_ok=True)
    tex_path.mkdir(parents=True, exist_ok=True)
    tikzplotlib.save(tex_path / "violin.tex")
    with open(tex_path / "violin.tex", 'r', encoding='utf-8') as f:
        content = f.read()

    # 添加preamble和end document
    preamble = r"\documentclass{article}" + "\n" + \
               r"\usepackage{pgfplots}" + "\n" + \
               r"\usepackage{tikz}" + "\n" + \
               r"\begin{document}" + "\n" + \
               r"\pagestyle{empty}" + "\n"
    end_document = r"\end{document}" + "\n"
    # 替换 false 为 true
    content = re.sub(r'majorticks=false', 'majorticks=true', content)
    pattern = r'axis line style={lightgray204},\n'
    content = re.sub(pattern, '', content)
    # 插入字号控制
    insert_text = r"font=\large," + "\n" + \
                  r"tick label style={font=\small}," + "\n" + \
                  r"label style={font=\normalsize}," + "\n"
    insert_position = content.find(r'tick align=outside,')
    modified_content = content[:insert_position] + insert_text + content[insert_position:]

    # 将修改后的内容写回文件
    with open(tex_path / "violin.tex", 'w', encoding='utf-8') as f:
        f.write(preamble + modified_content + end_document)

    compile_tex(tex_path / "violin.tex", pdf_path)
    plt.close()


@plot_register('box')
def plot_box(ab:AnalysisBase, save_path, **kwargs):
    if 'mode' in kwargs:
        mode = kwargs['mode']
    else:
        mode = 'median'
    methods = set()

    results = ab.get_results_by_order(["method", "task", "seed"])

    result_list = []
    for method, method_r in results.items():
        methods.add(method)
        result = []
        for task, task_r in method_r.items():
            best = []
            for seed, result_obj in task_r.items():
                if result_obj is not None:
                    Y = result_obj.Y
                    if Y is not None:
                        min_values = np.min(Y)
                        best.append(min_values)
            if mode == 'median':
                result.append(np.median(best))
            elif mode == 'mean':
                result.append(np.mean(best))
        result_list.append(result)
    result_list = np.array(result_list).T

    ranks = np.array([scipy.stats.rankdata(x, method='min') for x in result_list])
    df = pds.DataFrame(ranks, columns=methods)

    sns.set_theme(style='whitegrid', font='FreeSerif')
    plt.figure(figsize=(12, 8))
    ax = plt.gca()
    y_major_locator = MultipleLocator(1)
    ax.yaxis.set_major_locator(y_major_locator)
    sns.boxplot(df, color='#c2d0e9')
    plt.title('Box plot', fontsize=30, y=1.03)
    plt.xlabel('Algorithm Name', fontsize=25)
    plt.ylabel('Rank', fontsize=25)
    plt.xticks(fontsize=20, rotation=10)
    plt.yticks(fontsize=20)

    save_path = Path(save_path / 'Overview')
    pdf_path = Path(save_path / 'Pictures')
    tex_path = Path(save_path / 'tex')
    save_path.mkdir(parents=True, exist_ok=True)
    pdf_path.mkdir(parents=True, exist_ok=True)
    tex_path.mkdir(parents=True, exist_ok=True)
    tikzplotlib.save(tex_path / "box.tex")

    with open(tex_path / "box.tex", 'r', encoding='utf-8') as f:
        content = f.read()

        # 添加preamble和end document
    preamble = r"\documentclass{article}" + "\n" + \
               r"\usepackage{pgfplots}" + "\n" + \
               r"\usepackage{tikz}" + "\n" + \
               r"\begin{document}" + "\n" + \
               r"\pagestyle{empty}" + "\n"
    end_document = r"\end{document}" + "\n"

    content = re.sub(r'majorticks=false', 'majorticks=true', content)
    pattern = r'axis line style={lightgray204},\n'
    content = re.sub(pattern, '', content)
    insert_text = r"font=\large," + "\n" + \
                  r"tick label style={font=\small}," + "\n" + \
                  r"label style={font=\normalsize}," + "\n"
    insert_position = content.find(r'tick align=outside,')
    modified_content = content[:insert_position] + insert_text + content[insert_position:]

    # 将修改后的内容写回文件
    with open(tex_path / "box.tex", 'w', encoding='utf-8') as f:
        f.write(preamble + modified_content + end_document)

    compile_tex(tex_path / "box.tex", pdf_path)

    plt.close()


@plot_register('dbscan')
def dbscan_analysis(ab: AnalysisBase, save_path, **kwargs):
    results = ab.get_results_by_order(['task', 'method', 'seed'])
    tasks_names = set()
    method_names = set()
    result_of_n_clusters = {}
    result_of_noise_points = {}
    result_of_avg_cluster_size = {}

    for task_name, task_r in results.items():
        tasks_names.add(task_name)
        result_of_n_clusters[task_name] = defaultdict(dict)
        result_of_noise_points[task_name] = defaultdict(dict)
        result_of_avg_cluster_size[task_name] = defaultdict(dict)
        for method, method_r in task_r.items():
            method_names.add(method)
            result_of_n_clusters[task_name][method] = []
            result_of_noise_points[task_name][method] = []
            result_of_avg_cluster_size[task_name][method] = []
            for seed, result_obj in method_r.items():
                if result_obj is not None:
                    Y = result_obj.Y
                    X = result_obj.X
                    if Y is not None:
                        db = DBSCAN(eps=0.5, min_samples=5)
                        # 执行聚类
                        db.fit(X)
                        # 获取聚类标签
                        labels = db.labels_
                        # 计算簇的数量（忽略噪声点，其标签为 -1）
                        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
                        cluster_sizes = Counter(labels)
                        noise_points = cluster_sizes[-1]  # 标签为 -1 的点是噪声点
                        # 计算平均簇大小（不包括噪声点）
                        if n_clusters > 0:
                            t_size = 0
                            for ids, cs in cluster_sizes.items():
                                if ids >= 0:
                                    t_size += cs
                            avg_cluster_size = t_size / n_clusters
                        else:
                            avg_cluster_size = 0

                        result_of_n_clusters[task_name][method].append(n_clusters)
                        result_of_noise_points[task_name][method].append(noise_points)
                        result_of_avg_cluster_size[task_name][method].append(avg_cluster_size)

    def modify_lex_file(tex_path, pdf_path):
        with open(tex_path, 'r', encoding='utf-8') as f:
            content = f.read()

            # 添加preamble和end document
        preamble = r"\documentclass{article}" + "\n" + \
                   r"\usepackage{pgfplots}" + "\n" + \
                   r"\usepackage{tikz}" + "\n" + \
                   r"\begin{document}" + "\n" + \
                   r"\pagestyle{empty}" + "\n"
        end_document = r"\end{document}" + "\n"

        content = re.sub(r'majorticks=false', 'majorticks=true', content)
        pattern = r'axis line style={lightgray204},\n'
        content = re.sub(pattern, '', content)
        insert_text = r"font=\large," + "\n" + \
                      r"tick label style={font=\small}," + "\n" + \
                      r"label style={font=\normalsize}," + "\n"
        insert_position = content.find(r'tick align=outside,')
        modified_content = content[:insert_position] + insert_text + content[insert_position:]

        # 将修改后的内容写回文件
        with open(tex_path, 'w', encoding='utf-8') as f:
            f.write(preamble + modified_content + end_document)

        compile_tex(tex_path, pdf_path)

    tex_path = save_path / 'dbscan' / 'tex'
    pdf_path = save_path / 'dbscan'
    save_path.mkdir(parents=True, exist_ok=True)
    pdf_path.mkdir(parents=True, exist_ok=True)
    tex_path.mkdir(parents=True, exist_ok=True)
    for task_name in tasks_names:
        df_n_clusters = pds.DataFrame(result_of_n_clusters[task_name])
        sns.set_theme(style='whitegrid', font='FreeSerif')
        plt.figure(figsize=(12, 8))
        ax = plt.gca()
        y_major_locator = MultipleLocator(1)
        ax.yaxis.set_major_locator(y_major_locator)
        sns.boxplot(data=df_n_clusters, order=method_names)
        plt.title('Clusters', fontsize=30, y=1.03)
        plt.ylabel('number', fontsize=25)
        plt.xticks(fontsize=20, rotation=10)
        plt.yticks(fontsize=20)
        tikzplotlib.save(tex_path / f"{task_name}_n_clusters.tex")
        modify_lex_file(tex_path / f"{task_name}_n_clusters.tex", pdf_path)
        plt.close()

        df_noise_points = pds.DataFrame(result_of_noise_points[task_name])
        plt.figure(figsize=(12, 8))
        ax = plt.gca()
        y_major_locator = MultipleLocator(1)
        ax.yaxis.set_major_locator(y_major_locator)
        sns.boxplot(data=df_noise_points, order=method_names)
        plt.title('Noise Points', fontsize=30, y=1.03)
        plt.ylabel('number', fontsize=25)
        plt.xticks(fontsize=20, rotation=10)
        plt.yticks(fontsize=20)
        tikzplotlib.save(tex_path / f"{task_name}_noise_points.tex")
        modify_lex_file(tex_path / f"{task_name}_noise_points.tex", pdf_path)
        plt.close()

        df_avg_cluster_size = pds.DataFrame(result_of_avg_cluster_size[task_name])
        plt.figure(figsize=(12, 8))
        ax = plt.gca()
        y_major_locator = MultipleLocator(1)
        ax.yaxis.set_major_locator(y_major_locator)
        sns.boxplot(data=df_avg_cluster_size, order=method_names)
        plt.title('Avg Cluster Size', fontsize=30, y=1.03)
        plt.ylabel('number', fontsize=25)
        plt.xticks(fontsize=20, rotation=10)
        plt.yticks(fontsize=20)
        tikzplotlib.save(tex_path / f"{task_name}_avg_cluster_size.tex")
        modify_lex_file(tex_path / f"{task_name}_avg_cluster_size.tex", pdf_path)
        plt.close()


@plot_register('heatmap')
def plot_heatmap(ab:AnalysisBase, save_path, **kwargs):
    results = ab.get_results_by_order(['method', 'task', 'seed'])
    methods = ab.get_methods()
    tasks = list(ab.get_task_names())

    # Step 1: Calculate the best result for each method, task, and seed
    best_results = {method: {task: [] for task in tasks} for method in methods}
    for method in methods:
        for task in tasks:
            for seed, result_obj in results[method][task].items():
                if result_obj is not None and result_obj.Y is not None:
                    best_results[method][task].append(result_obj.best_Y)

    # Step 2: Calculate the mean of the best results for each method and task
    mean_best_results = {method: {task: np.mean(best_results[method][task]) for task in tasks} for method in methods}

    # Step 3: Create a dataframe for the heatmap
    heatmap_data = pds.DataFrame(mean_best_results).T

    # Step 4: Plot the heatmap
    n_cols = len(tasks)
    colormaps = ['Blues', 'Reds', 'Greens', 'Purples']  # Adjust as needed
    fig = plt.figure(figsize=(n_cols * 2, 5))
    gs = gridspec.GridSpec(1, n_cols, width_ratios=[1 for _ in range(n_cols)])
    for col, task in enumerate(tasks):
        ax = plt.subplot(gs[col])
        sns.heatmap(heatmap_data[[task]], annot=True, cmap=colormaps[col % len(colormaps)], cbar=False, ax=ax, **kwargs)
        if col == 0:
            ax.set_ylabel('Method')
        if col != 0:
            ax.set_yticks([])
            ax.set_yticklabels([])

    plt.suptitle("Heatmap of Methods")
    fig.text(0.5, 0.01, 'Task Name', ha='center')
    plt.tight_layout(rect=[0, 0.03, 1, 0.99])
    save_path = Path(save_path / 'Overview')
    png_path = Path(save_path / 'Pictures')
    save_path.mkdir(parents=True, exist_ok=True)
    png_path.mkdir(parents=True, exist_ok=True)

    plt.savefig(png_path/'heatmap.png', format='png')


================================================
FILE: transopt/ResultAnalysis/ReportNote.py
================================================
# There are some explanation about figures and tables
Notes = {
    'box': 'The box plot compares the performance of different algorithms across all problems, primarily '
           'displaying the minimum value, the first quartile, the third quartile, and the maximum value.',
    'violin': 'The violin plot compares the performance of different algorithms across all problems, combining '
              'elements of kernel density estimation and box plots to provide a more detailed view of data '
              'distribution.',
    'convergence_rate': 'Convergence rate refers to the speed at which an optimization algorithm converges to a '
                        'solution when addressing a problem. A higher convergence rate is often seen as an '
                        'advantage in algorithm performance.',
    'scott_knott': '123',
    'compare_convergence_rate': '321',
    'compare_mean': '111',
}

================================================
FILE: transopt/ResultAnalysis/TableAnalysis.py
================================================
import numpy as np
from collections import defaultdict
from transopt.utils.sk import Rx
import scipy
from transopt.ResultAnalysis.TableToLatex import matrix_to_latex
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase
from transopt.ResultAnalysis.CompileTex import compile_tex
import os

table_registry = {}

# 注册函数的装饰器
def Tabel_register(name):
    def decorator(func_or_class):
        if name in table_registry:
            raise ValueError(f"Error: '{name}' is already registered.")
        table_registry[name] = func_or_class
        return func_or_class
    return decorator

@Tabel_register('mean')
def record_mean_std(ab:AnalysisBase, save_path, **kwargs):
    # Similar to record_mean_std function in PeerComparison.py
    res_mean = {}
    res_std = {}
    res_sig = {}
    results = ab.get_results_by_order(["task", "method", "seed"])
    for task_name, task_r in results.items():
        result_mean = []
        result_std = []
        data = {}
        data_mean = {}
        for method, method_r in task_r.items():
            best = []
            for seed, result_obj in method_r.items():
                best.append(result_obj.best_Y)
                data[method] = best.copy()
                data_mean[method] = (np.mean(best), np.std(best))
                result_mean.append(np.mean(best))
                result_std.append(np.std(best))

        res_mean[task_name] = result_mean
        res_std[task_name] = result_std
        rst_m = {}
        sorted_dic = sorted(data_mean.items(), key=lambda kv: (kv[1][0]))
        for method in ab.get_methods():
            if method == sorted_dic[0][0]:
                rst_m[method] = '-'
                continue
            s, p = scipy.stats.mannwhitneyu(data[sorted_dic[0][0]], data[method], alternative='two-sided')
            if p < 0.05:
                rst_m[method] = '+'
            else:
                rst_m[method] = '-'
        res_sig[task_name] = rst_m
    latex_code = matrix_to_latex({'mean':res_mean, 'std':res_std, 'significance':res_sig}, list(ab.get_task_names()), list(ab.get_methods()),
                                 caption='Performance comparisons of the quality of solutions obtained by different algorithms.')
    save_path = save_path / 'Overview'
    os.makedirs(save_path, exist_ok=True)
    tex_save_path = save_path / 'tex'
    os.makedirs(tex_save_path, exist_ok=True)
    table_path = save_path / 'Table'
    os.makedirs(table_path, exist_ok=True)

    with open(tex_save_path / f"compare_mean.tex", 'w') as f:
        f.write(latex_code)
    try:
        compile_tex(tex_save_path / f"compare_mean.tex" , table_path)
    except:
        pass

    print(f"LaTeX code has been saved to {tex_save_path}")

@Tabel_register('cr')
def record_convergence_rate(ab:AnalysisBase, save_path, **kwargs):
    # Similar to record_convergence function in PeerComparison.py
    res_mean = {}
    res_std = {}
    res_sig = {}

    def acc_iter(Y, anchor_value):
        for i in range(1, len(Y)):
            best_fn = np.min(Y[:i])
            if best_fn <= anchor_value:
                return i/len(Y)

        return 1
    # 遍历 data 字典，收集 best_Y 值
    results = ab.get_results_by_order(["method", "seed", "task"])
    best_Y_values = defaultdict(list)
    for method, tasks in results.items():
        for seed, task_seed in tasks.items():
            for task_name, result_obj in task_seed.items():
                best_Y = result_obj.best_Y
                if best_Y is not None:
                    best_Y_values[task_name].append(best_Y)

    # 计算并返回每个 task_name 下 best_Y 值的 3/4 分位数
    quantiles = {task_name: np.percentile(values, 75) for task_name, values in best_Y_values.items()}
    results = ab.get_results_by_order(["task", "method", "seed"])
    for task_name, task_r in results.items():
        result_mean = []
        result_std = []
        data = {}
        data_mean = {}
        for method, method_r in task_r.items():
            best = []
            for seed, result_obj in method_r.items():
                Y = result_obj.Y
                if Y is None:
                    raise ValueError(f"Y is not set for method {method}, task {task_name}")

                cr = acc_iter(Y, anchor_value=quantiles[task_name])
                best.append(cr)

            data[method] = best.copy()
            data_mean[method] = (np.mean(best), np.std(best))
            result_mean.append(np.mean(best))
            result_std.append(np.std(best))

        res_mean[task_name] = result_mean
        res_std[task_name] = result_std

        rst_m = {}
        sorted_dic = sorted(data_mean.items(), key=lambda kv: (kv[1][0]), reverse=False)
        for method in ab.get_methods():
            if method == sorted_dic[0][0]:
                rst_m[method] = '-'
                continue
            s, p = scipy.stats.mannwhitneyu(data[sorted_dic[0][0]], data[method], alternative='two-sided')
            if p < 0.05:
                rst_m[method] = '+'
            else:
                rst_m[method] = '-'
        res_sig[task_name] = rst_m
    latex_code = matrix_to_latex({'mean': res_mean, 'std': res_std, 'significance': res_sig}, list(ab.get_task_names()),
                                 list(ab.get_methods()),
                                 caption='Convergence rate comparison among different algorithms.')
    save_path = save_path / 'Overview'
    os.makedirs(save_path, exist_ok=True)
    tex_save_path = save_path / 'tex'
    os.makedirs(tex_save_path, exist_ok=True)
    table_path = save_path / 'Table'
    os.makedirs(table_path, exist_ok=True)

    with open(tex_save_path / f"compare_convergence_rate.tex", 'w') as f:
        f.write(latex_code)
    try:
        compile_tex(tex_save_path / f"compare_convergence_rate.tex", table_path)
    except:
        pass

    print(f"LaTeX code has been saved to {tex_save_path}")


================================================
FILE: transopt/ResultAnalysis/TableToLatex.py
================================================
import numpy as np
from typing import Union, Dict


def matrix_to_latex(Data: Dict, col_names, row_names, caption, oder="min"):
    mean = Data["mean"]
    std = Data["std"]
    significance = Data["significance"]
    num_cols = len(mean.keys())
    num_rows = len(row_names)

    if len(col_names) != num_cols or len(row_names) != num_rows:
        raise ValueError(
            "Mismatch between matrix dimensions and provided row/column names."
        )

    latex_code = []
    # 添加文档类和宏包
    latex_code.append("\\documentclass{article}")
    latex_code.append("\\usepackage{geometry}")
    latex_code.append("\\geometry{a4paper, margin=1in}")
    latex_code.append("\\usepackage{graphicx}")
    latex_code.append("\\usepackage{colortbl}")
    latex_code.append("\\usepackage{booktabs}")
    latex_code.append("\\usepackage{threeparttable}")
    latex_code.append("\\usepackage{caption}")
    latex_code.append("\\usepackage{xcolor}")
    latex_code.append("\\pagestyle{empty}")

    # 开始文档
    latex_code.append("\\begin{document}")
    latex_code.append("")
    latex_code.append("\\begin{table*}[t!]")
    latex_code.append("    \\scriptsize")
    latex_code.append("    \\centering")
    latex_code.append(f"    \\caption{{{caption}}}")
    latex_code.append("    \\resizebox{1.0\\textwidth}{!}{")
    latex_code.append("    \\begin{tabular}{c|" + "".join(["c"] * (num_rows)) + "}")
    latex_code.append("        \\hline")

    # Adding column names
    col_header = " & ".join([""] + row_names) + " \\\\"
    latex_code.append("        " + col_header)
    latex_code.append("        \\hline")

    # Adding rows
    for i in range(num_cols):
        str_data = []
        for j in range(num_rows):
            str_format = ""
            if oder == "min":
                if mean[col_names[i]][j] == np.min(mean[col_names[i]]):
                    str_format += "\cellcolor[rgb]{ .682,  .667,  .667}\\textbf{"
                    str_format += "%.3E(%.3E)" % (
                        float(mean[col_names[i]][j]),
                        std[col_names[i]][j],
                    )
                    str_format += "}"
                    str_data.append(str_format)
                else:
                    if significance[col_names[i]][row_names[j]] == "+":
                        str_data.append(
                            "%.3E(%.3E)$^\dagger$"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
                    else:
                        str_data.append(
                            "%.3E(%.3E)"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
            else:
                if mean[col_names[i]][j] == np.max(mean[col_names[i]]):
                    str_format += "\cellcolor[rgb]{ .682,  .667,  .667}\\textbf{"
                    str_format += "%.3E(%.3E)" % (
                        float(mean[col_names[i]][j]),
                        std[col_names[i]][j],
                    )
                    str_format += "}"
                    str_data.append(str_format)
                else:
                    if significance[col_names[i]][row_names[j]] == "+":
                        str_data.append(
                            "%.3E(%.3E)$^\dagger$"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
                    else:
                        str_data.append(
                            "%.3E(%.3E)"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
        test_name = col_names[i].split("_")[0] + col_names[i].split("_")[1]
        row_data = " & ".join(["\\texttt{" + f"{test_name}" + "}"] + str_data) + " \\\\"
        latex_code.append("        " + row_data)

    latex_code.append("        \\hline")
    latex_code.append("    \\end{tabular}")
    latex_code.append("    }")
    latex_code.append("    \\begin{tablenotes}")
    latex_code.append("        \\tiny")
    latex_code.append(
        "        \\item The labels in the first column are the combination of the first letter of test problem and the number of variables, e.g., A4 is Ackley problem with $n=4$."
    )
    latex_code.append(
        "        \\item $^\\dagger$ indicates that the best algorithm is significantly better than the other one according to the Wilcoxon signed-rank test at a 5\\% significance level."
    )
    latex_code.append("    \\end{tablenotes}")
    latex_code.append("\\end{table*}%")
    latex_code.append("\\end{document}")

    return "\n".join(latex_code)


================================================
FILE: transopt/ResultAnalysis/TrackOptimization.py
================================================
import numpy as np
from collections import Counter, defaultdict
from transopt.ResultAnalysis.AnalysisBase import AnalysisBase


track_registry = {}

# 注册函数的装饰器
def track_register(name):
    def decorator(func_or_class):
        if name in track_registry:
            raise ValueError(f"Error: '{name}' is already registered.")
        track_registry[name] = func_or_class
        return func_or_class
    return decorator


================================================
FILE: transopt/ResultAnalysis/__init__.py
================================================


================================================
FILE: transopt/__init__.py
================================================


================================================
FILE: transopt/agent/__init__.py
================================================


================================================
FILE: transopt/agent/app.py
================================================
import json
import os
from multiprocessing import Process, Manager

from flask import Flask, jsonify, request
from flask_cors import CORS
from services import Services

from transopt.agent.registry import *
from transopt.utils.log import logger


def create_app():
    app = Flask(__name__)
    
    origins = os.getenv("CORS_ORIGINS", "*")
    CORS(app, resources={r"/*": {"origins": origins}})
    
    app.config['DEBUG'] = bool(os.getenv('DEBUG', True))
    
    manager = Manager()
    task_queue = manager.Queue()
    result_queue = manager.Queue()
    db_lock = manager.Lock()

    services = Services(task_queue, result_queue, db_lock)

    @app.route("/api/generate-yaml", methods=["POST"])
    def generate_yaml():
        # try:
        data = request.json
        user_input = data.get("content", {}).get("text", "")
        response_content = services.chat(user_input)
        return jsonify({"message": response_content}), 200
        # except Exception as e:
        #     logger.error(f"Error in generating YAML: {e}")
        #     return jsonify({"error": str(e)}), 500


    @app.route("/api/Dashboard/tasks", methods=["POST"])
    def report_send_tasks_information():
        all_info = services.get_experiment_datasets()
        all_tasks_info = []
        for task_name, task_info in all_info:
            info = task_info['additional_config']
            info['problem_name'] = task_name
            all_tasks_info.append(info)
        
        
        return jsonify(all_tasks_info), 200


    @app.route("/api/Dashboard/charts", methods=["POST"])
    def report_update_charts_data():
        data = request.json
        user_input = data.get("taskname", "")
        charts = services.get_report_charts(user_input)
        return jsonify(charts), 200


    @app.route("/api/Dashboard/trajectory", methods=["POST"])
    def report_update_trajectory_data():
        data = request.json
        user_input = data.get("taskname", "")
        # trajectory, 数据格式和以前一样 {"TrajectoryData":...}
        charts = services.get_report_traj(user_input)
        return jsonify(charts), 200


    @app.route("/api/configuration/select_task", methods=["POST"])
    def configuration_recieve_tasks():
        tasks_info = request.json
        # try:
        services.receive_tasks(tasks_info) 
        # except Exception as e:
        #     logger.error(f"Error in searching dataset: {e}")
        #     return jsonify({"error": str(e)}), 500
        
        return {"succeed": True}, 200


    @app.route("/api/configuration/select_algorithm", methods=["POST"])
    def configuration_recieve_algorithm():
        optimizer_info = request.json
        print(optimizer_info)
        # optimizer_info = {'SpaceRefiner': 'default', 
        #                   'SpaceRefinerParameters': '', 
        #                   'SpaceRefinerDataSelector': 'default', 
        #                   'SpaceRefinerDataSelectorParameters': '', 
        #                   'Sampler': 'default', 
        #                   'SamplerParameters': '', 
        #                   'SamplerInitNum': '11',
        #                   'SamplerDataSelector': 'default', 
        #                   'SamplerDataSelectorParameters': '', 
        #                   'Pretrain': 'default', 
        #                   'PretrainParameters': '', 
        #                   'PretrainDataSelector': 'default', 
        #                   'PretrainDataSelectorParameters': '', 
        #                   'Model': 'default', 
        #                   'ModelParameters': '', 
        #                   'ModelDataSelector': 'default', 
        #                   'ModelDataSelectorParameters': '', 
        #                   'ACF': 'default', 
        #                   'ACFParameters': '', 
        #                   'ACFDataSelector': 'default', 
        #                   'ACFDataSelectorParameters': '', 
        #                   'Normalizer': 'default', 
        #                   'NormalizerParameters': '', 
        #                   'NormalizerDataSelector': 'default', 
        #                   'NormalizerDataSelectorParameters': ''}
        try:
            services.receive_optimizer(optimizer_info)
        except Exception as e:
            logger.error(f"Error in searching dataset: {e}")
            return jsonify({"error": str(e)}), 500
        
        return {"succeed": True}, 200


    @app.route("/api/configuration/basic_information", methods=["POST"])
    def configuration_basic_information():
        data = request.json
        user_input = data.get("paremeter", "")

        task_data = services.get_modules()
        # with open('transopt/agent/page_service_data/configuration_basic.json', 'r') as file:
        #     data = json.load(file)
        print(services)
        return jsonify(task_data), 200


    @app.route("/api/configuration/dataset", methods=["POST"])
    def configuration_dataset():
        metadata_info = request.json
        # print(metadata_info)
        # metadate_info = {
        #     "object": "Space refiner",
        #     "datasets": ["dataset1", "dataset2]
        # }
        if metadata_info['object'] == 'Narrow Search Space':
            metadata_info['object'] = 'SpaceRefiner'
        elif metadata_info['object'] == 'Initialization':
            metadata_info['object'] = 'Sampler'
        elif metadata_info['object'] == 'Pre-train':
            metadata_info['object'] = 'Pretrain'
        elif metadata_info['object'] == 'Surrogate Model':
            metadata_info['object'] = 'Model'
        elif metadata_info['object'] == 'Acquisition Function':
            metadata_info['object'] = 'ACF'
        
        try:
            services.set_metadata(metadata_info)
        except Exception as e:
            logger.error(f"Error in searching dataset: {e}")
            return jsonify({"error": str(e)}), 500
        
        return {"succeed": True}, 200


    @app.route("/api/Dashboard/errorsubmit", methods=["POST"])
    def errorsubmit():
        try:

            return {"succeed": True}, 200
        except Exception as e:
            logger.error(f"Error in searching dataset: {e}")
            return {"error": False}, 200
    
    @app.route("/api/configuration/search_dataset", methods=["POST"])
    def configuration_search_dataset():
        try:
            data = request.json

            dataset_name = data["task_name"]
            if data['search_method'] == 'Fuzzy' or 'Hash':
                dataset_info = {}
            elif data['search_method'] == 'LSH':
                dataset_info = {
                    "num_variables": data["num_variables"],
                    "num_objectives": data["num_objectives"],
                    "variables": [
                        {"name": var_name} for var_name in data["variables_name"].split(",")
                    ],
                }
            else:
                pass
            datasets = services.search_dataset(data['search_method'], dataset_name, dataset_info)

            return jsonify(datasets), 200
        except Exception as e:
            logger.error(f"Error in searching dataset: {e}")
            return jsonify({"error": str(e)}), 500


    @app.route("/api/configuration/delete_dataset", methods=["POST"])
    def configuration_delete_dataset():
        metadata_info = request.json
        datasets = metadata_info["datasets"]
        services.remove_dataset(datasets) 
        return {"succeed": True}, 200


    @app.route("/api/configuration/run", methods=["POST"])
    def configuration_run():
        run_info = request.json
        
        if "Seeds" in run_info:
            seeds = [int(seed) for seed in run_info['Seeds'].split(",")]
        else:
            seeds = [0]
        services.run_optimize(seeds)  # Handle process creation within run_optimize
        
        return jsonify({"isSucceed": True}), 200

    @app.route("/api/configuration/run_progress", methods=["POST"])
    def configuration_run_progress():
        message = request.json
        # 获取正在运行的任务的进度
        data = []
        process_info = services.get_all_process_info()
        for subpross_id, subpross  in process_info.items():
            if subpross['status'] == 'running':
                data.append({
                    "name": f"{subpross['task']}_pid_{subpross_id}",
                    "progress": str(subpross['progress']),
                })
        
        return jsonify(data), 200

    @app.route("/api/configuration/stop_progress", methods=["POST"])
    def configuration_stop_progress():
        message = request.json
        task_name = message['name']
        print(task_name)
        pid = int(task_name.split('_')[-1])
        services.terminate_task(pid)

        return {"succeed": True}, 200


    @app.route("/api/RunPage/get_info", methods=["POST"])
    def run_page_get_info():
        data = request.json
        user_input = data.get("action", "")

        task_data = services.get_configuration()
        # with open('transopt/agent/page_service_data/configuration_info.json', 'r') as file:
        #     data = json.load(file)
        return jsonify(task_data), 200


    @app.route("/api/comparison/selections", methods=["POST"])
    def comparison_send_selections():
        info = request.json
        # Comparison初始化时，请求可选择的搜索选项
        data = services.get_comparision_modules()

        return jsonify(data), 200


    @app.route("/api/comparison/choose_task", methods=["POST"])
    def comparison_choose_tasks():
        conditions = request.json
        ret = []
        charts_data = {}
        for condition in conditions:
            ret.append(services.comparision_search(condition)) 
        

        charts_data['BoxData'] = services.get_box_plot_data(ret)
        charts_data['TrajectoryData'] = services.construct_statistic_trajectory_data(ret)
        return jsonify(charts_data), 200

    return app

def main():
    app = create_app()
    app.run(debug=app.config['DEBUG'], port=5001)

if __name__ == "__main__":
    main()

================================================
FILE: transopt/agent/chat/openai_chat.py
================================================
import json
import subprocess
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

import yaml
from openai.types.chat.chat_completion import ChatCompletion
from pydantic import BaseModel

from transopt.agent.config import RunningConfig
from transopt.agent.registry import *
from transopt.benchmark.instantiate_problems import InstantiateProblems
from transopt.datamanager.manager import DataManager
from transopt.optimizer.construct_optimizer import ConstructOptimizer
from transopt.utils.log import logger


def dict_to_string(dictionary):
    return json.dumps(dictionary, ensure_ascii=False, indent=4)


class Message(BaseModel):
    """Model for LLM messages"""

    role: str  # The role of the message author (system, user, assistant, or function).
    content: Optional[Union[str, List[Dict]]] = None  # The message content.
    tool_call_id: Optional[str] = None  # ID for the tool call response
    name: Optional[str] = None  # Name of the tool or function, if applicable
    metrics: Dict[str, Any] = {}  # Metrics for the message.
    

    def get_content_string(self) -> str:
        """Returns the content as a string."""
        if isinstance(self.content, str):
            return self.content
        if isinstance(self.content, list):
            return json.dumps(self.content)
        return ""

    def to_dict(self) -> Dict[str, Any]:
        _dict = self.model_dump(exclude_none=True, exclude={"metrics"})
        # Manually add the content field if it is None
        if self.content is None:
            _dict["content"] = None
        return _dict

    def log(self, level: Optional[str] = None):
        """Log the message to the console."""
        _logger = getattr(logger, level or "debug")
        
        _logger(f"============== {self.role} ==============")
        message_detail = f"Content: {self.get_content_string()}"
        if self.tool_call_id:
            message_detail += f", Tool Call ID: {self.tool_call_id}"
        if self.name:
            message_detail += f", Name: {self.name}"
        _logger(message_detail)


class OpenAIChat:
    history: List[Message]

    def __init__(
        self,
        api_key,
        model="gpt-3.5-turbo",
        base_url="https://api.openai.com/v1",
        client_kwargs: Optional[Dict[str, Any]] = None,
        data_manager: Optional[DataManager] = None,
    ):
        self.base_url = base_url
        self.model = model
        self.api_key = api_key 
        self.client_kwargs = client_kwargs or {}

        self.prompt = self._get_prompt()
        self.is_first_msg = True
        
        self.history = []

        self.data_manager = DataManager() if data_manager is None else data_manager
        self.running_config = RunningConfig()

    def _get_prompt(self):
        """Reads a prompt from a file."""
        current_dir = Path(__file__).parent
        file_path = current_dir / "prompt"
        with open(file_path, "r") as file:
            return file.read()
        
    @property
    def client(self):
        """Lazy initialization of the OpenAI client."""
        from openai import OpenAI
        return OpenAI(
            api_key=self.api_key, base_url=self.base_url,
            **self.client_kwargs
        )

    def invoke_model(self, messages: List[Dict]) -> ChatCompletion:
        self.history.extend(messages)
        
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "get_all_datasets",
                    "description": "Show all available datasets in our system",
                    "parameters": {},
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_dataset_info",
                    "description": "Show detailed information of dataset according to the dataset name",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "dataset_name": {
                                "type": "string",
                                "description": "The name of the dataset",
                            },
                        },
                        "required": ["dataset_name"],
                    },
                },
            },
            {
                "type": "function",
                "function": {
                    "name": "get_all_problems",
                    "description": "Show all optimization problems that our system supoorts",
                    "parameters": {},
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "get_optimization_techniques",
                    "description": "Show all optimization techniques supported in  our system,",
                    "parameters": {},
                },
            },
                        
            {
                "type": "function",
                "function": {
                    "name": "set_optimization_problem",
                    "description": "Define or set an optimization problem based on user inputs for 'problem name', 'workload' and 'budget'.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "problem_name": {
                                "type": "string",
                                "description": "The name of the optimization problem",
                            },
                            "workload": {
                                "type": "integer",
                                "description": "The number of workload",
                            },
                            "budget": {
                                "type": "integer",
                                "description": "The number of budget to do function evaluations",
                            },
                        },
                        "required": ["problem_name", "workload", "budget"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "set_model",
                    "description": "Set the model used as surrogate model in the  Bayesian optimization, The input model name should be one of the available models.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "Model": {
                                "type": "string",
                                "description": "The model name",
                            },
                        },
                        "required": ["Model"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "set_sampler",
                    "description": "Set the sampler for the optimization process as user input. The input sampler name should be one of the available samplers.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "Sampler": {
                                "type": "string",
                                "description": "The name of Sampler",
                            },
                        },
                        "required": ["Sampler"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "set_pretrain",
                    "description": "Set the Pretrain methods. The input of users should include one of the available pretrain methods.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "Pretrain": {
                                "type": "string",
                                "description": "The name of Pretrain method",
                            },
                        },
                        "required": ["Pretrain"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "set_normalizer",
                    "description": "Set the normalization method to nomalize function evaluation and parameters. It requires one of the available normalization methods as input.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "Normalizer": {
                                "type": "string",
                                "description": "The name of Normalization method",
                            },
                        },
                        "required": ["Normalizer"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "set_metadata",
                    "description": "Set the metadata using a dataset stored in our system and specify a module to utilize this metadata.",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "Normalizer": {
                                "type": "string",
                                "description": "The name of Normalization method",
                            },
                        },
                        "required": ["module_name", "dataset_name"],
                    },
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "run_optimization",
                    "description": "Set the normalization method to nomalize function evaluation and parameters. It requires one of the available normalization methods as input.",
                    "parameters": {},
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "show_configuration",
                    "description": "Display all configurations set by the user so far, including the optimizer configuration, metadata configuration, and optimization problems",
                    "parameters": {},
                },
            },
            
            {
                "type": "function",
                "function": {
                    "name": "install_package",
                    "description": "Install a Python package using pip",
                    "parameters": {
                        "type": "object",
                        "properties": {
                            "package_name": {
                                "type": "string",
                                "description": "The name of the package to install",
                            },
                        },
                        "required": ["package_name"],
                    },
                },
            },      
        ]
                
        response = self.client.chat.completions.create(
            model=self.model,
            messages=messages,
            tools=tools,
            tool_choice="auto",
            temperature=0.1,
        )
        response_message = response.choices[0].message
        tool_calls = response_message.tool_calls
        # Process tool calls if there are any
        if tool_calls:
            self.history.append(response_message)
            for tool_call in tool_calls:
                function_name = tool_call.function.name
                function_args = json.loads(tool_call.function.arguments)
                function_response = self.call_manager_function(function_name, **function_args)
                tool_message = {
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "name": function_name,
                    "content": function_response
                }
                self.history.append(tool_message)
                
            # Refresh the model with the function response and get a new response
            response = self.client.chat.completions.create(
                model=self.model,
                messages=self.history,
            )
        
        self.history.append(response.choices[0].message) 
        logger.debug(f"Response: {response.choices[0].message.content}")
        return response

    def get_response(self, user_input) -> str:
        logger.debug("---------- OpenAI Response Start ----------")
        user_message = {"role": "user", "content": user_input}
        logger.debug(f"User: {user_input}")
        messages = [user_message]

        if self.is_first_msg:
            system_message = {"role": "system", "content": self.prompt}
            messages.insert(0, system_message)
            self.is_first_msg = False
        else:
            system_message = {"role": "system", "content": "Don't tell me which function to use, just call it. Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous"}
            messages.insert(0, system_message)
            

        response = self.invoke_model(messages)
        logger.debug(f"Assistant: {response.choices[0].message.content}")
        logger.debug("---------- OpenAI Response End ----------")
        return response.choices[0].message.content 
    
    def call_manager_function(self, function_name, **kwargs):
        available_functions = {
            "get_all_datasets": self.data_manager.get_all_datasets,
            "get_all_problems": self.get_all_problems,
            "get_optimization_techniques": self.get_optimization_techniques,
            "get_dataset_info": lambda: self.data_manager.get_dataset_info(kwargs['dataset_name']),
            "set_optimization_problem": lambda: self.set_optimization_problem(kwargs['problem_name'], kwargs['workload'], kwargs['budget']),
            'set_space_refiner': lambda: self.set_space_refiner(kwargs['refiner']),
            'set_sampler': lambda: self.set_sampler(kwargs['Sampler']),
            'set_pretrain': lambda: self.set_pretrain(kwargs['Pretrain']),
            'set_model': lambda: self.set_model(kwargs['Model']),
            'set_normalizer': lambda: self.set_normalizer(kwargs['Normalizer']),
            'set_metadata': lambda: self.set_metadata(kwargs['module_name'], kwargs['dataset_name']),
            'run_optimization': self.run_optimization,
            'show_configuration': self.show_configuration,
            "install_package": lambda: self.install_package(kwargs['package_name']),
        }
        function_to_call = available_functions[function_name]
        return json.dumps({"result": function_to_call()})
    
    def _initialize_modules(self):
        import transopt.benchmark.synthetic
        # import transopt.benchmark.CPD
        import transopt.optimizer.acquisition_function
        import transopt.optimizer.model
        import transopt.optimizer.pretrain
        import transopt.optimizer.refiner
        import transopt.optimizer.sampler

    def get_all_problems(self):
        tasks_info = []

        # tasks information
        task_names = problem_registry.list_names()
        for name in task_names:
            if problem_registry[name].problem_type == "synthetic":
                num_obj = problem_registry[name].num_objectives
                num_var = problem_registry[name].num_variables
                task_info = {
                    "name": name,
                    "problem_type": "synthetic",
                    "anyDim": "True",
                    'num_vars': [],
                    "num_objs": [1],
                    "workloads": [],
                    "fidelity": [],
                }
            else:
                num_obj = problem_registry[name].num_objectives
                num_var = problem_registry[name].num_variables
                fidelity = problem_registry[name].fidelity
                workloads = problem_registry[name].workloads
                task_info = {
                    "name": name,
                    "problem_type": "synthetic",
                    "anyDim": False,
                    "num_vars": [num_var],
                    "num_objs": [num_obj],
                    "workloads": [workloads],
                    "fidelity": [fidelity],
                }
            tasks_info.append(task_info)
        return tasks_info
    
    def get_optimization_techniques(self):
        basic_info = {}

        selector_info = []
        model_info = []
        sampler_info = []
        acf_info = []
        pretrain_info = []
        refiner_info = []
        normalizer_info = []
        
        # tasks information
        sampler_names = sampler_registry.list_names()
        for name in sampler_names:
            sampler_info.append(name)
        basic_info["Sampler"] = ','.join(sampler_info)

        refiner_names = space_refiner_registry.list_names()
        for name in refiner_names:
            refiner_info.append(name)
        basic_info["SpaceRefiner"] = ','.join(refiner_info)

        pretrain_names = pretrain_registry.list_names()
        for name in pretrain_names:
            pretrain_info.append(name)
        basic_info["Pretrain"] = ','.join(pretrain_info)

        model_names = model_registry.list_names()
        for name in model_names:
            model_info.append(name)
        basic_info["Model"] = ','.join(model_info)

        acf_names = acf_registry.list_names()
        for name in acf_names:
            acf_info.append(name)
        basic_info["ACF"] = ','.join(acf_info)

        selector_names = selector_registry.list_names()
        for name in selector_names:
            selector_info.append(name)
        basic_info["DataSelector"] = ','.join(selector_info)
        
        normalizer_names = selector_registry.list_names()
        for name in normalizer_names:
            normalizer_info.append(name)
        basic_info["Normalizer"] = ','.join(normalizer_info)
        
        
        return basic_info
    
    def set_optimization_problem(self, problem_name, workload, budget):        
        problem_info = {}
        if problem_name in problem_registry:
            problem_info[problem_name] = {
                'budget': budget,
                'workload': workload,
                'budget_type': 'Num_FEs',
                "params": {},
            }

        self.running_config.set_tasks(problem_info)
        return "Succeed"
    
    def set_space_refiner(self, refiner):
        self.running_config.optimizer['SpaceRefiner'] = refiner
        return f"Succeed to set the space refiner {refiner}"

    def set_sampler(self, Sampler):
        self.running_config.optimizer['Sampler'] = Sampler
        return f"Succeed to set the sampler {Sampler}"
    
    
    def set_pretrain(self, Pretrain):
        self.running_config.optimizer['Pretrain'] = Pretrain
        return f"Succeed to set the pretrain {Pretrain}"
    
    def set_model(self, Model):
        self.running_config.optimizer['Model'] = Model
        return f"Succeed to set the model {Model}"
    
    def set_normalizer(self, Normalizer):
        self.running_config.optimizer['Normalizer'] = Normalizer
        return f"Succeed to set the normalizer {Normalizer}"
    
    def set_metadata(self, module_name, dataset_name):
        self.running_config.metadata[module_name] = dataset_name
        return f"Succeed to set the metadata {dataset_name} for {module_name}"
    
    def run_optimization(self):
        task_set = InstantiateProblems(self.running_config.tasks, 0)
        optimizer = ConstructOptimizer(self.running_config.optimizer, 0)
        
        try:
            while (task_set.get_unsolved_num()):
                iteration = 0
                search_space = task_set.get_cur_searchspace()
                dataset_info, dataset_name = self.construct_dataset_info(task_set, self.running_config, seed=0)
                
                self.data_manager.db.create_table(dataset_name, dataset_info, overwrite=True)
                optimizer.link_task(task_name=task_set.get_curname(), search_sapce=search_space)
                
                metadata, metadata_info = self.get_metadata('SpaceRefiner')
                optimizer.search_space_refine(metadata, metadata_info)
                
                metadata, metadata_info = self.get_metadata('Sampler')
                samples = optimizer.sample_initial_set(metadata, metadata_info)
                
                parameters = [search_space.map_to_design_space(sample) for sample in samples]
                observations = task_set.f(parameters)
                self.save_data(dataset_name, parameters, observations, iteration)
                
                optimizer.observe(samples, observations)
                
                #Pretrain
                metadata, metadata_info = self.get_metadata('Model')
                optimizer.meta_fit(metadata, metadata_info)
        
                while (task_set.get_rest_budget()):
                    optimizer.fit()
                    suggested_samples = optimizer.suggest()
                    parameters = [search_space.map_to_design_space(sample) for sample in suggested_samples]
                    observations = task_set.f(parameters)
                    self.save_data(dataset_name, parameters, observations, iteration)
                    
                    optimizer.observe(suggested_samples, observations)
                    iteration += 1
                    
                    print("Seed: ", 0, "Task: ", task_set.get_curname(), "Iteration: ", iteration)
                    # if self.verbose:
                    #     self.visualization(testsuits, suggested_sample)
                task_set.roll()
        except Exception as e:
            raise e
    def show_configuration(self):
        conf = {'Optimization problem': self.running_config.tasks, 'Optimizer': self.running_config.optimizer, 'Metadata': self.running_config.metadata}
        return dict_to_string(conf)
    
    def install_package(self, package_name: str) -> str:
        """Install a Python package using pip."""
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
            return f"Package '{package_name}' installed successfully."
        except subprocess.CalledProcessError as e:
            logger.error(f"Failed to install package '{package_name}': {e}")
            return f"Failed to install package '{package_name}'. Error: {str(e)}"

================================================
FILE: transopt/agent/chat/prompt
================================================


You are an agent of the "Transfer Optimization System," designed to solve optimization problems. The system can solve optimization problems with transfer learning for optimization techniques. 
Your primary roles are:
1.Display system information, including datasets stored within the system, available optimization modules and methods, and the optimization problems that the system can address.
2.Assist users in configuring and launching optimization problems using suitable methods. 


Please inform users that in order to utilize our system, they are required to complete a four-step configuration process:

1.Define the optimization problem, ensuring to specify both the workload and the budget.
2.Configure the optimization method, which includes the following five modules: search space refiner, sampler, pre-train method, model, acquisition function, and normalizer. Use default methods for any modules not explicitly configured. Each module can be individually set with a specific method.
3.Choose the metadata by selecting one or more datasets already available in the system.
4.Run the optimization process."

<!-- 
Example 1:
Input:I want to tune the parameters of a deep neural network to improve its prediction accuracy. How should I use TOS to do this. -->


================================================
FILE: transopt/agent/chat/prompt.bak
================================================
Please transform my optimization group description into a JSON format according to the following template. Ensure the description adheres to the structure outlined below, including all necessary and optional fields:
{
  "group_id": "Specify group ID, generated automatically if unspecified, starting from 1",
  "group_type": "Specify 'Sequential' or 'Parallel'",
  "tasks": [
    {
      "task_name": "Choose from 'HPOXGBoost', 'HPOSVM', 'HPORes18'",
      "variables": {
        "variable_name": {"type": "Specify type, e.g., 'categorical', 'integer', 'continuous'", "range or choices accroding to type": [Specify range or choices]}
      },
      "objectives": {
        "objective_name": {"type": "Specify 'minimize' or 'maximize'"}
      },
      "fidelities": {
        "fidelity_name": {"type": "Specify type, e.g., 'categorical', 'integer', 'continuous'", "range or choices according to type": [Specify range or choices], "default": "Specify default value"}
      },
      "workloads": "Mandatory, specify the name of the workloads",
      "budget": "Mandatory, specify the budget"
    }
  ],
  "algorithm": {
    "name": "Specify algorithm name, default if unspecified is 'BO'",
    "parameters": {
      "parameter_name": "Specify parameter value, e.g., 'max_iter': 100"
    }
  },
  "auxiliary": {
    "selection_criteria": "Optional, specify criteria",
    "using_stage": "Optional, specify stage",
  }
}
The above JSON not only specifies the format, but also the names of each field and the requirements for the values. The main requirement for values should not be directly filled into the generated content.


Requirements:
Group ID: Automatically generated, starting from 1.
Group Type: Mandatory. Indicate whether tasks are to be executed in a 'Sequential' or 'Parallel' manner.
Tasks: Mandatory. List each task, including mandatory fields like Task Name, Workloads, and Budget, and optional fields like Variables, Objectives, Fidelities. Optional fields' values should be {} if unspecified. 
Algorithm: Optional. Specify the algorithm name and any parameters. If unspecified, 'BO' will be used as the default algorithm.
Auxiliary: Optional. Include any additional information if necessary.

Output:
The output should only have two possibilities: 
2. If the description omits any mandatory fields  or the provided details contain inconsistencies, you should give an error message indicating the missing or incorrect information and do not generate the JSON structure. For example, if budget not specified, you should just give an error message like "Budget is missing, ...." and not generate the JSON structure.

================================================
FILE: transopt/agent/chat/yaml_generator.py
================================================
from pathlib import Path
from typing import Any, Dict

import yaml
from transopt.utils.log import logger
from agent.chat.openai_chat import Message, OpenAIChat


def get_prompt(file_name: str) -> str:
    """Reads a prompt from a file."""
    current_dir = Path(__file__).parent
    file_path = current_dir / file_name
    
    with open(file_path, 'r') as file:
        prompt = file.read()
    return prompt


def parse_response(response: str) -> Dict[str, Any]:
    """Parses a string response into a structured Python dictionary."""
    try:
        structured_info = yaml.safe_load(response)
    except yaml.YAMLError as e:
        logger.error(f"Error parsing response into Python dict: {e}")
        structured_info = {}
    return structured_info


def main():
    # Assuming OpenAIChat and Message are defined elsewhere and imported correctly
    openai_chat = OpenAIChat()
    
    print("Welcome to the YAML Generator!")
    user_input = input("\nPlease describe the configuration you'd like to convert to YAML:\n")
    
    # Process the input using the OpenAI API
    prompt = get_prompt("prompt")  # Assuming the prompt file is named 'prompt.yml'
    system_message = Message(role="system", content=prompt)
    user_message = Message(role="user", content=user_input)
    response_content = openai_chat.get_response([system_message, user_message])
    
    print("\nAssistant's Response:\n")
    print(response_content)

    while True:
        refine = input("\nPlease refine your configuration or type 'exit' to quit:\n")
        if refine.lower() == 'exit':
            print("Thank you for using the YAML Generator!")
            break
        
        user_message = Message(role="user", content=refine)
        response_content = openai_chat.get_response([user_message])
        
        print("\nAssistant's Response:\n")
        print(response_content)


if __name__ == "__main__":
    main()

================================================
FILE: transopt/agent/config.py
================================================

class Config:
    DEBUG = True
    OPENAI_API_KEY = "sk-1XGNThXZQVYh6EI25b44Bb74940d4eEdBdDa81723e00C794"
    OPENAI_URL = "https://aihubmix.com/v1"


class RunningConfig:
    _instance = None
    _init = False  # 用于保证初始化代码只运行一次

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(RunningConfig, cls).__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    
    
    def __init__(self):
        self.tasks = None
        self.optimizer = {'SpaceRefiner':None, 'Sampler':None, 'ACF':None, 'Pretrain':None, 'Model':None, 'Normalizer':None}
        self.metadata = {'SpaceRefiner':[], 'Sampler':[], 'ACF':[], 'Pretrain':[], 'Model':[], 'Normalizer':[]}
        
        
    def set_tasks(self, tasks):
        self.tasks = tasks
        
    def set_optimizer(self, optimizer):
        self.optimizer = optimizer
        if 'SamplerInitNum' not in self.optimizer:
            self.optimizer['SamplerInitNum'] = 11
        self.optimizer['SamplerInitNum'] =  int(self.optimizer['SamplerInitNum'])

    def set_metadata(self, metadata):
        self.metadata[metadata['object']] = metadata['datasets']

    
================================================
FILE: transopt/agent/registry.py
================================================
class Registry:
    def __init__(self):
        self._registry = {}

    def register(self, name=None, cls=None, **kwargs):
        if cls is None:
            def wrapper(cls):
                return self.register(name, cls, **kwargs)
            return wrapper
        
        if name is None:
            name = cls.__name__

        if name in self._registry:
            raise ValueError(f"Error: '{name}' is already registered.")
        
        self._registry[name] = {'cls': cls, **kwargs}
        return cls

    def get(self, name):
        return self._registry[name]['cls']

    def list_names(self):
        return list(self._registry.keys())

    def __getitem__(self, item):
        return self.get(item)

    def __contains__(self, item):
        return item in self._registry

space_refiner_registry = Registry()
sampler_registry = Registry()
pretrain_registry = Registry()
model_registry = Registry()
acf_registry = Registry()
problem_registry = Registry()
statistic_registry = Registry()
selector_registry = Registry()
normalizer_registry = Registry()


================================================
FILE: transopt/agent/run_cli.py
================================================
import os
import traceback
import argparse
from services import Services

os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"


def set_task(services, args):
    task_info = [{
        "name": args.task_name,
        "num_vars": args.num_vars,
        "num_objs": args.num_objs,
        "fidelity": args.fidelity,
        "workloads": args.workloads,
        "budget_type": args.budget_type,
        "budget": args.budget,
    }]
    services.receive_tasks(task_info)


def set_optimizer(services, args):
    optimizer_info = {
        "SpaceRefiner": args.space_refiner,
        "SpaceRefinerParameters": args.space_refiner_parameters,
        "SpaceRefinerDataSelector": args.space_refiner_data_selector,
        "SpaceRefinerDataSelectorParameters": args.space_refiner_data_selector_parameters,
        "Sampler": args.sampler,
        "SamplerInitNum": args.sampler_init_num,
        "SamplerParameters": args.sampler_parameters,
        "SamplerDataSelector": args.sampler_data_selector,
        "SamplerDataSelectorParameters": args.sampler_data_selector_parameters,
        "Pretrain": args.pre_train,
        "PretrainParameters": args.pre_train_parameters,
        "PretrainDataSelector": args.pre_train_data_selector,
        "PretrainDataSelectorParameters": args.pre_train_data_selector_parameters,
        "Model": args.model,
        "ModelParameters": args.model_parameters,
        "ModelDataSelector": args.model_data_selector,
        "ModelDataSelectorParameters": args.model_data_selector_parameters,
        "ACF": args.acquisition_function,
        "ACFParameters": args.acquisition_function_parameters,
        "ACFDataSelector": args.acquisition_function_data_selector,
        "ACFDataSelectorParameters": args.acquisition_function_data_selector_parameters,
        "Normalizer": args.normalizer,
        "NormalizerParameters": args.normalizer_parameters,
        "NormalizerDataSelector": args.normalizer_data_selector,
        "NormalizerDataSelectorParameters": args.normalizer_data_selector_parameters,
    }
    services.receive_optimizer(optimizer_info)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    # Task
    parser.add_argument("-n", "--task_name", type=str, default="MixupOOD")
    parser.add_argument("-v", "--num_vars", type=int, default=2)
    parser.add_argument("-o", "--num_objs", type=int, default=1)
    parser.add_argument("-f", "--fidelity", type=str, default="")
    parser.add_argument("-w", "--workloads", type=str, default="0")
    parser.add_argument("-bt", "--budget_type", type=str, default="Num_FEs")
    parser.add_argument("-b", "--budget", type=int, default=100)
    # Optimizer
    parser.add_argument("-sr", "--space_refiner", type=str, default="None")
    parser.add_argument("-srp", "--space_refiner_parameters", type=str, default="")
    parser.add_argument("-srd", "--space_refiner_data_selector", type=str, default="None")
    parser.add_argument("-srdp", "--space_refiner_data_selector_parameters", type=str, default="")
    parser.add_argument("-sp", "--sampler", type=str, default="random")
    parser.add_argument("-spi", "--sampler_init_num", type=int, default=22)
    parser.add_argument("-spp", "--sampler_parameters", type=str, default="")
    parser.add_argument("-spd", "--sampler_data_selector", type=str, default="None")
    parser.add_argument("-spdp", "--sampler_data_selector_parameters", type=str, default="")
    parser.add_argument("-pt", "--pre_train", type=str, default="None")
    parser.add_argument("-ptp", "--pre_train_parameters", type=str, default="")
    parser.add_argument("-ptd", "--pre_train_data_selector", type=str, default="None")
    parser.add_argument("-ptdp", "--pre_train_data_selector_parameters", type=str, default="")
    parser.add_argument("-m", "--model", type=str, default="GP")
    parser.add_argument("-mp", "--model_parameters", type=str, default="")
    parser.add_argument("-md", "--model_data_selector", type=str, default="None")
    parser.add_argument("-mdp", "--model_data_selector_parameters", type=str, default="")
    parser.add_argument("-acf", "--acquisition_function", type=str, default="EI")
    parser.add_argument("-acfp", "--acquisition_function_parameters", type=str, default="")
    parser.add_argument("-acfd", "--acquisition_function_data_selector", type=str, default="None")
    parser.add_argument("-acfdp", "--acquisition_function_data_selector_parameters", type=str, default="")
    parser.add_argument("-norm", "--normalizer", type=str, default="Standard")
    parser.add_argument("-normp", "--normalizer_parameters", type=str, default="")
    parser.add_argument("-normd", "--normalizer_data_selector", type=str, default="None")
    parser.add_argument("-normdp", "--normalizer_data_selector_parameters", type=str, default="")
    # Seed
    parser.add_argument("-s", "--seeds", type=int, default=0)
    # parser.add_argument("-s", "--seeds", type=str, default="5")


    args = parser.parse_args()
    services = Services(None, None, None)
    services._initialize_modules()
    set_task(services, args)
    set_optimizer(services, args)
    try:
        services._run_optimize_process(seed = args.seeds)
    except Exception as e:
        traceback.print_exc()


================================================
FILE: transopt/agent/services.py
================================================
import os
import signal
import time
from multiprocessing import Manager, Process

import numpy as np

from transopt.agent.chat.openai_chat import OpenAIChat
from transopt.agent.config import Config, RunningConfig
from transopt.agent.registry import *
from transopt.analysis.parameter_network import plot_network
from transopt.benchmark.instantiate_problems import InstantiateProblems
from transopt.datamanager.manager import Database, DataManager
from transopt.optimizer.construct_optimizer import (ConstructOptimizer,
                                                    ConstructSelector)

from transopt.utils.log import logger
from transopt.analysis.mds import FootPrint

class Services:
    def __init__(self, task_queue, result_queue, lock):
        self.config = Config()
        self.running_config = RunningConfig()
        
        # DataManager for general tasks, not specific optimization tasks
        self.data_manager = DataManager()
        self.tasks_info = []

        self.openai_chat = OpenAIChat(
            api_key=self.config.OPENAI_API_KEY,
            model="gpt-3.5-turbo",
            base_url=self.config.OPENAI_URL,
            data_manager= self.data_manager
        )

        self._initialize_modules()
        self.process_info = Manager().dict()
        self.lock = Manager().Lock()

    def chat(self, user_input):
        response_content = self.openai_chat.get_response(user_input)
        return response_content

    def _initialize_modules(self):
        # import transopt.benchmark.CPD
        # import transopt.benchmark.CPD
        import transopt.benchmark.HPOOOD
        import transopt.benchmark.HPO
        import transopt.benchmark.synthetic
        import transopt.benchmark.CSSTuning
        import transopt.optimizer.acquisition_function
        import transopt.optimizer.model
        import transopt.optimizer.normalizer
        import transopt.optimizer.pretrain
        import transopt.optimizer.refiner
        import transopt.optimizer.sampler
        import transopt.optimizer.selector
        
        
    def get_modules(self):
        basic_info = {}
        tasks_info = []
        selector_info = []
        model_info = []
        sampler_info = []
        acf_info = []
        pretrain_info = [{'name':'None'}]
        refiner_info = [{'name':'None'}]
        normalizer_info = [{'name':'None'}]

        # tasks information
        task_names = problem_registry.list_names()
        for name in task_names:
            if problem_registry[name].problem_type == "synthetic":
                num_obj = problem_registry[name].num_objectives
                num_var = problem_registry[name].num_variables
                task_info = {
                    "name": name,
                    "problem_type": "synthetic",
                    "anyDim": "True",
                    'num_vars': [],
                    "num_objs": [1],
                    "workloads": [],
                    "fidelity": [],
                }
            else:
                num_obj = problem_registry[name].num_objectives
                num_var = problem_registry[name].num_variables
                fidelity = problem_registry[name].fidelity
                workloads = problem_registry[name].workloads
                problem_type = problem_registry[name].problem_type
                task_info = {
                    "name": name,
                    "problem_type": problem_type,
                    "anyDim": False,
                    "num_vars": [num_var],
                    "num_objs": [num_obj],
                    "workloads": workloads,
                    "fidelity": [fidelity],
                }
            tasks_info.append(task_info)
        basic_info["TasksData"] = tasks_info

        sampler_names = sampler_registry.list_names()
        for name in sampler_names:
            sampler_info.append({"name": name})
        basic_info["Sampler"] = sampler_info

        refiner_names = space_refiner_registry.list_names()
        for name in refiner_names:
            refiner_info.append({"name": name})
        basic_info["SpaceRefiner"] = refiner_info

        pretrain_names = pretrain_registry.list_names()
        for name in pretrain_names:
            pretrain_info.append({"name": name})
        basic_info["Pretrain"] = pretrain_info

        model_names = model_registry.list_names()
        for name in model_names:
            model_info.append({"name": name})
        basic_info["Model"] = model_info

        acf_names = acf_registry.list_names()
        for name in acf_names:
            acf_info.append({"name": name})
        basic_info["ACF"] = acf_info

        selector_names = selector_registry.list_names()
        for name in selector_names:
            selector_info.append({"name": name})
        
        basic_info["DataSelector"] = selector_info
        
        normalizer_names = normalizer_registry.list_names()
        for name in normalizer_names:
            normalizer_info.append({"name": name})
        basic_info["Normalizer"] = normalizer_info

        return basic_info
    
    
    def get_comparision_modules(self):
        module_info = {}
        model_info = []
        sampler_info = []
        acf_info = []
        pretrain_info = ['None']
        refiner_info = ['None']
        normalizer_info = ['None']

        sampler_names = sampler_registry.list_names()
        for name in sampler_names:
            sampler_info.append(name)
        module_info["Sampler"] = sampler_info

        refiner_names = space_refiner_registry.list_names()
        for name in refiner_names:
            refiner_info.append(name)
        module_info["Refiner"] = refiner_info

        pretrain_names = pretrain_registry.list_names()
        for name in pretrain_names:
            pretrain_info.append(name)
        module_info["Pretrain"] = pretrain_info

        model_names = model_registry.list_names()
        for name in model_names:
            model_info.append(name)
        module_info["Model"] = model_info

        acf_names = acf_registry.list_names()
        for name in acf_names:
            acf_info.append(name)
        module_info["ACF"] = acf_info
        
        normalizer_names = normalizer_registry.list_names()
        for name in normalizer_names:
            normalizer_info.append(name)
        module_info["Normalizer"] = normalizer_info

        return module_info

    def search_dataset(self, search_method, dataset_name, dataset_info):
        if search_method == 'Fuzzy':
            datasets_list = {"isExact": False, 
                             "datasets": list(self.data_manager.search_datasets_by_name(dataset_name))}
        elif search_method == 'Hash':
            dataset_detail_info = self.data_manager.get_dataset_info(dataset_name)
            if dataset_detail_info:
                datasets_list = {"isExact": True, "datasets": dataset_detail_info['additional_config']}
            else:
                raise ValueError("Dataset not found")
        elif search_method == 'LSH':
            datasets_list = {"isExact": False, 
                             "datasets":list(self.data_manager.search_similar_datasets(dataset_name, dataset_info))}
            
        else:
            raise ValueError("Invalid search method")

        return datasets_list
   
    def convert_metadata(self, conditions):
        type_map = {
            "NumVars": int,
            "NumObjs": int,
            "Workload": int,
            "Seed": int,
            # Add other fields as necessary
        }
        converted_conditions = {}
        for key, value in conditions.items():
            if key in type_map:
                try:
                    # Convert the value according to its expected type
                    if type_map[key] == int:
                        converted_conditions[key] = int(value)
                    elif type_map[key] == float:
                        converted_conditions[key] = float(value)
                    elif type_map[key] == bool:
                        converted_conditions[key] = value.lower() in ['true', '1', 't', 'yes', 'y']
                    else:
                        converted_conditions[key] = value  # Assume string or no conversion needed
                except ValueError:
                    raise ValueError(f"Invalid value for {key}: {value}")
            else:
                # If no specific type is expected, assume string
                converted_conditions[key] = value

        return converted_conditions
 
    def comparision_search(self, conditions):
        conditions = {k: v for k, v in conditions.items() if v}
        conditions = self.convert_metadata(conditions)
        
        key_map = {
            "TaskName": "problem_name",
            "NumVars": "dimensions",
            "NumObjs": "objectives",
            "Fidelity": "fidelities",
            "Workload": "workloads",
            "Seed": "seeds",
            "Refiner": "space_refiner",
            "Sampler": "sampler",
            "Pretrain": "pretrain",
            "Model": "model",
            "ACF": "acf",
            "Normalizer": "normalizer"
        }
        
        # change key in conditions to match the key in database
        conditions = {key_map[k]: v for k, v in conditions.items() if k in key_map}
        
        return self.data_manager.db.search_tables_by_metadata(conditions)
    
    def set_metadata(self, dataset_names):
        self.running_config.set_metadata(dataset_names)
        pass

    def receive_tasks(self, tasks_info):
        tasks = {}
        self.tasks_info = tasks_info
        workloads = []
        for task in tasks_info:
            for item in task["workloads"].split(","):
                try:
                    workloads.append(int(item))
                except:
                    workloads.append(item)
            tasks[task["name"]] = {
                "budget_type": task["budget_type"],
                "budget": int(task["budget"]),
                "workloads": workloads,
                "params": {"input_dim": int(task["num_vars"])},
            }

        self.running_config.set_tasks(tasks)
        return

    def receive_optimizer(self, optimizer_info):

        self.running_config.set_optimizer(optimizer_info)
        return

    def receive_metadata(self, metadata_info):
        print(metadata_info)

        self.running_config.set_metadata(metadata_info)
        return

    def get_all_datasets(self):
        all_tables = self.data_manager.db.get_table_list()
        return [self.data_manager.db.query_dataset_info(table) for table in all_tables]
    
    def get_experiment_datasets(self):
        experiment_tables = self.data_manager.db.get_table_list()
        return [(experiment_tables[table_id],self.data_manager.db.query_dataset_info(table)) for table_id, table in enumerate(experiment_tables)] 
   
    def construct_dataset_info(self, task_set, running_config, seed):
        dataset_info = {}
        dataset_info["variables"] = [
            {"name": var.name, "type": var.type, "range": var.range}
            for var_name, var in task_set.get_cur_searchspace_info().items()
        ]
        dataset_info["objectives"] = [
            {"name": name, "type": type}
            for name, type in task_set.get_curobj_info().items()
        ]
        dataset_info["fidelities"] = [
            {"name": var.name, "type": var.type, "range": var.range}
            for var_name, var in task_set.get_cur_fidelity_info().items()
        ]

        # Simplify dataset name construction
        timestamp = int(time.time())
        dataset_name = f"{task_set.get_curname()}_w{task_set.get_cur_workload()}_s{seed}_{timestamp}"

        dataset_info['additional_config'] = {
            "problem_name": task_set.get_curname(),
            "dim": len(dataset_info["variables"]),
            "obj": len(dataset_info["objectives"]),
            "fidelity": ', '.join([d['name'] for d in dataset_info["fidelities"] if 'name' in d]) if dataset_info["fidelities"] else '',
            "workloads": task_set.get_cur_workload(),
            "budget_type": task_set.get_cur_budgettype(),
            "initial_number": running_config.optimizer['SamplerInitNum'],
            "budget": task_set.get_cur_budget(),
            "seeds": seed,
            "SpaceRefiner": running_config.optimizer['SpaceRefiner'],
            "Sampler": running_config.optimizer['Sampler'],
            "Pretrain": running_config.optimizer['Pretrain'],
            "Model": running_config.optimizer['Model'],
            "ACF": running_config.optimizer['ACF'],
            "Normalizer": running_config.optimizer['Normalizer'],
            "DatasetSelector": f"SpaceRefiner-{running_config.optimizer['SpaceRefinerDataSelector']}, \
                Sampler - {running_config.optimizer['SamplerDataSelector']}, \
                Pretrain - {running_config.optimizer['PretrainDataSelector']}, \
                Model - {running_config.optimizer['ModelDataSelector']}, \
                ACF-{running_config.optimizer['ACFDataSelector']}, \
                Normalizer - {running_config.optimizer['NormalizerDataSelector']}",
            "metadata": running_config.metadata if running_config.metadata else [],
        }

        return dataset_info, dataset_name
 
    def get_metadata(self, module_name):
        if len(self.running_config.metadata[module_name]):
            metadata = {}
            metadata_info = {}
            for dataset_name in self.running_config.metadata[module_name]:
                metadata[dataset_name] = self.data_manager.db.select_data(dataset_name)
                metadata_info[dataset_name] = self.data_manager.db.query_dataset_info(dataset_name)
            return metadata, metadata_info
        else:
            return {}, {}
    
    def save_data(self, dataset_name, parameters, observations, iteration):
        data = [{} for i in range(len(parameters))]
        [data[i].update(parameters[i]) for i in range(len(parameters))]
        [data[i].update(observations[i]) for i in range(len(parameters))]
        [data[i].update({'batch':iteration}) for i in range(len(parameters))]
        self.data_manager.db.insert_data(dataset_name, data)
    
    def remove_dataset(self, dataset_name):
        if isinstance(dataset_name, str):
            self.data_manager.db.remove_table(dataset_name)
        elif isinstance(dataset_name, list):
            for name in dataset_name:
                self.data_manager.db.remove_table(name)
        else:
            raise ValueError("Invalid dataset name")

    def run_optimize(self, seeds):
        # Create a separate process for each seed
        process_list = []
        for seed in seeds:
            p = Process(target=self._run_optimize_process, args=(int(seed),))
            process_list.append(p)
            p.start()
        
        for p in process_list:
            p.join()
            
    def _run_optimize_process(self, seed):
        # Each process constructs its own DataManager
        try:
            import os
            pid = os.getpid()
            self.process_info[pid] = {'status': 'running', 'seed': seed, 'budget': None, 'task': None, 'iteration': 0, 'dataset_name': None, 'progress':0}
            logger.info(f"Start process #{pid}")

            # Instantiate problems and optimizer
            task_set = InstantiateProblems(self.running_config.tasks, seed)
            optimizer = ConstructOptimizer(self.running_config.optimizer, seed)
            dataselector = ConstructSelector(self.running_config.optimizer, seed)

            while (task_set.get_unsolved_num()):
                search_space = task_set.get_cur_searchspace()
                dataset_info, dataset_name = self.construct_dataset_info(task_set, self.running_config, seed=seed)
                
                self.data_manager.create_dataset(dataset_name, dataset_info, overwrite=True)
                self.update_process_info(pid, {'dataset_name': dataset_name, 'task': task_set.get_curname(), 'budget': task_set.get_cur_budget()})

                optimizer.link_task(task_name=task_set.get_curname(), search_space=search_space)
                    
                metadata, metadata_info = self.get_metadata('SpaceRefiner')
                if dataselector['SpaceRefinerDataSelector']:
                    metadata, metadata_info = dataselector['SpaceRefinerDataSelector'].fetch_data(dataset_info)
                optimizer.search_space_refine(metadata, metadata_info)
                    
                metadata, metadata_info = self.get_metadata('Sampler')
                if dataselector['SamplerDataSelector']:
                    metadata, metadata_info = dataselector['SamplerDataSelector'].fetch_data(dataset_info)
                samples = optimizer.sample_initial_set(metadata, metadata_info)
                
                
                parameters = [search_space.map_to_design_space(sample) for sample in samples]
                observations = task_set.f(parameters)
                self.save_data(dataset_name, parameters, observations, self.process_info[pid]['iteration'])
                    
                optimizer.observe(samples, observations)
                    
                # Pretrain
                metadata, metadata_info = self.get_metadata('Pretrain')
                if dataselector['PretrainDataSelector']:
                    metadata, metadata_info = dataselector['PretrainDataSelector'].fetch_data(dataset_info)
                optimizer.pretrain(metadata, metadata_info)
                
                
                metadata, metadata_info = self.get_metadata('Model')
                if dataselector['ModelDataSelector']:
                    metadata, metadata_info = dataselector['ModelDataSelector'].fetch_data(dataset_info)
                optimizer.meta_fit(metadata, metadata_info)
            
                while (task_set.get_rest_budget()):
                    optimizer.fit()
                    suggested_samples = optimizer.suggest()
                    parameters = [search_space.map_to_design_space(sample) for sample in suggested_samples]
                    observations = task_set.f(parameters)
                    if observations is None:
                        break
                    self.save_data(dataset_name, parameters, observations, self.process_info[pid]['iteration'])
                    optimizer.observe(suggested_samples, observations)
                    
                    cur_iter = self.process_info[pid]['iteration']
                    self.update_process_info(pid, {'iteration': cur_iter + 1})
                    self.update_process_info(pid, {'progress': 100 * (task_set.get_cur_budget() - task_set.get_rest_budget()) / task_set.get_cur_budget()})
                    logger.info(f"PID {pid}: Seed {seed}, Task {task_set.get_curname()}, Iteration {self.process_info[pid]['iteration']}")
                task_set.roll()
        except Exception as e:
            logger.error(f"Error in process {pid}: {str(e)}")
            raise e
        finally:
            self.update_process_info(pid, {'status': 'completed'})
   
    def terminate_task(self, pid):
        with self.lock:
            if pid in self.process_info:
                dataset_name = self.process_info[pid].get('dataset_name')
                try:
                    os.kill(pid, signal.SIGTERM)
                    logger.info(f"Process {pid} has been terminated.")
                except Exception as e:
                    logger.error(f"Failed to terminate process {pid}: {str(e)}")
                if dataset_name:
                    try:
                        self.data_manager.remove_dataset(dataset_name)
                        logger.info(f"Dataset {dataset_name} associated with process {pid} has been deleted.")
                    except Exception as e:
                        logger.error(f"Failed to delete dataset {dataset_name}: {str(e)}")
                del self.process_info[pid]
            else:
                logger.warning(f"No such process {pid} found in process info.")
    
    def update_process_info(self, pid, updates):
        with self.lock:
            temp_info = self.process_info[pid].copy()
            temp_info.update(updates)
            self.process_info[pid] = temp_info
        
    def get_all_process_info(self):
        return dict(self.process_info)
    
    def get_box_plot_data(self, task_names):
        all_data = {}
        for group_id, group in enumerate(task_names):
            all_data[str(group_id)] = []
            for task_name in group:
                data = self.data_manager.db.select_data(task_name)
                table_info = self.data_manager.db.query_dataset_info(task_name)
                objectives = table_info["objectives"]
                obj = objectives[0]["name"]
                try:
                    best_obj = min([d[obj] for d in data])
                except:
                    pass
                all_data[str(group_id)].append(best_obj)

        return all_data
    
    
    def get_report_charts(self, task_name):
        all_data = self.data_manager.db.select_data(task_name)

        table_info = self.data_manager.db.query_dataset_info(task_name)
        objectives = table_info["objectives"]
        ranges = [tuple(var['range']) for var in table_info["variables"]]
        initial_number = table_info["additional_config"]["initial_number"]
        obj = objectives[0]["name"]
        obj_type = objectives[0]["type"]

        obj_data = [data[obj] for data in all_data]
        var_data = [[data[var["name"]] for var in table_info["variables"]] for data in all_data]
        variables = [var["name"] for var in table_info["variables"]]
        ret = {}
        ret.update(self.construct_footprint_data(task_name, var_data, ranges, initial_number))
        ret.update(self.construct_trajectory_data(task_name, obj_data, obj_type))
        # ret.update(self.construct_importance_data(task_name, var_data, obj_data, variables))

        return ret


    def get_report_traj(self, task_name):
        all_data = self.data_manager.db.select_data(task_name)

        table_info = self.data_manager.db.query_dataset_info(task_name)
        objectives = table_info["objectives"]

        obj = objectives[0]["name"]
        obj_type = objectives[0]["type"]

        obj_data = [data[obj] for data in all_data]
        ret = {}
        ret.update(self.construct_trajectory_data(task_name, obj_data, obj_type))

        return ret

    def construct_footprint_data(self, name, var_data, ranges, initial_number):
        # Initialize the list to store trajectory data and the best value seen so far
        fp = FootPrint(var_data, ranges)
        fp.calculate_distances()
        fp.get_mds()
        scatter_data = {'Initial vectors': fp._reduced_data[:initial_number], 'Decision vectors': fp._reduced_data[initial_number:len(fp.X)], 'Boundary vectors': fp._reduced_data[len(fp.X):]}
        # scatter_data = {}
        return {"ScatterData": scatter_data}
    
    def construct_statistic_trajectory_data(self, task_names):
        all_data = []
        for group_id, group in enumerate(task_names):
            min_data = {'name': f'Algorithm{group_id + 1}', 'average': [], 'uncertainty': []}
            res = []
            max_length = 0
            for task_name in group:
                data = self.data_manager.db.select_data(task_name)
                table_info = self.data_manager.db.query_dataset_info(task_name)
                objectives = table_info["objectives"]
                obj = objectives[0]["name"]
                obj_data = [d[obj] for d in data]
                acc_obj_data = np.minimum.accumulate(obj_data).flatten().tolist()
                res.append(acc_obj_data)
                if len(acc_obj_data) > max_length:
                    max_length = len(acc_obj_data)

            # 计算每个点的中位数和标准差
            for i in range(max_length):
                current_data = [r[i] for r in res if i < len(r)]
                median = np.median(current_data)
                std = np.std(current_data)
                min_data['average'].append({'FEs': i + 1, 'y': median})
                min_data['uncertainty'].append({'FEs': i + 1, 'y': [median - std, median + std]})

            all_data.append(min_data)

        return all_data
        
    
    def construct_trajectory_data(self, name, obj_data, obj_type="minimize"):
        # Initialize the list to store trajectory data and the best value seen so far
        trajectory = []
        best_value = float("inf") if obj_type == "minimize" else -float("inf")
        best_values_so_far = []

        # Loop through each function evaluation
        for index, current_value in enumerate(obj_data, start=1):
            # Update the best value based on the objective type
            if obj_type == "minimize":
                if current_value < best_value:
                    best_value = current_value
            else:  # maximize
                if current_value > best_value:
                    best_value = current_value

            # Append the best value observed so far to the list
            best_values_so_far.append(best_value)
            trajectory.append({"FEs": index, "y": best_value})

        uncertainty = []
        for data_point in trajectory:
            base_value = data_point["y"]
            uncertainty_range = [base_value, base_value]
            uncertainty.append({"FEs": data_point["FEs"], "y": uncertainty_range})

        trajectory_data = {
            "name": name,
            "average": trajectory,
            "uncertainty": uncertainty,
        }

        return {"TrajectoryData": [trajectory_data]}

    def construct_importance_data(self, name, var_data, obj_data, variables):
        # plot_network(np.array(var_data), np.array(obj_data), variables)
        return {}

    def get_configuration(self):
        configuration_info = {}
        configuration_info["tasks"] = self.tasks_info
        configuration_info["optimizer"] = self.running_config.optimizer
        configuration_info["datasets"] = self.running_config.metadata
        return configuration_info


================================================
FILE: transopt/agent/testood.py
================================================
import logging
import time
from typing import Dict, Union

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import tqdm
import matplotlib.pyplot as plt

from torchvision import datasets, transforms

from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.optimizer.sampler.random import RandomSampler
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.space.variable import *
from transopt.utils.openml_data_manager import OpenMLHoldoutDataManager
from transopt.datamanager.database import Database
from services import Services

from transopt.benchmark.HPO.HPOCNN import *

import os
import sys
import unittest
from pathlib import Path

def plot_acc_scatter(train_acc, test_acc):
    # Create a scatter plot
    plt.scatter(train_acc, test_acc, label='Accuracy Points')
    
    # Plot the diagonal line
    min_val = min(min(train_acc), min(test_acc))
    max_val = max(max(train_acc), max(test_acc))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--', label='Diagonal Line')
    
    # Add labels and title
    plt.xlabel('Train Accuracy')
    plt.ylabel('Test Accuracy')
    plt.title('Train vs Test Accuracy')
    plt.legend()
    
    # Show the plot
    plt.savefig('./train vs test accuracy.png')


current_dir = Path(__file__).resolve().parent
package_dir = current_dir.parent
sys.path.insert(0, str(package_dir))

def setUp():
    db = Database("database.db")
    table_name = "test_table"
        
def list_pth_files(directory):
    # Create an empty list to store .pth file paths
    pth_files = []

    # Walk through the directory
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file ends with .pth
            if file.endswith('.pth'):
                # Create the full path to the file
                file_path = os.path.join(root, file)
                # Add the file path to the list
                pth_files.append(file_path)

    return pth_files

if __name__ == "__main__":

    services = Services(None, None, None)
    task_name = []
    parameters = []
    # tables = services.get_experiment_datasets()
    # for table in tables:
    #     print(table[1]['data_number'])
    #     if table[1]['data_number'] == 100:
    #         task_name = table[0]
    #         print(task_name)

    #         all_data = services.data_manager.db.select_data(task_name)
    #         table_info = services.data_manager.db.query_dataset_info(task_name)
                    
    #         objectives = table_info["objectives"]
    #         ranges = [tuple(var['range']) for var in table_info["variables"]]
    #         initial_number = table_info["additional_config"]["initial_number"]
    #         obj = objectives[0]["name"]
    #         obj_type = objectives[0]["type"]

    #         obj_data = [data[obj] for data in all_data]
    #         max_id = np.argmax(obj_data)
            
    #         var_data = [[data[var["name"]] for var in table_info["variables"]] for data in all_data]
    #         variables = [var["name"] for var in table_info["variables"]]
    #         ret = {}
    #         traj = services.construct_trajectory_data(task_name, obj_data, obj_type="maximize")
    #         best_var = var_data[max_id]
    #         lr = np.exp2(best_var[0])
    #         momentum = best_var[1]
    #         weight_decay = np.exp2(best_var[2])
    #         parameters.append((lr, momentum, weight_decay))
    
    
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.cuda.set_device(1)
    else:
        device = torch.device("cpu")
        
    trainset = datasets.MNIST(
        root="./data", train=True, download=True, transform=transforms.Compose(
            [
                BGRed(),
                
                transforms.ToTensor(),
                transforms.Resize((32, 32)),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ]
        )
    )
    testset = datasets.MNIST(
        root="./data", train=False, download=True, transform=transforms.Compose(
            [
                BGRed(),
                
                transforms.ToTensor(),
                transforms.Resize((32, 32)),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            ]
        )
    )
    
    
    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=64, shuffle=True
    )
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=64, shuffle=False
    )
    epochs = 30
    batch_size = 64

    # lr = 0.0017607943222948076
    # momentum = 0.6997583600209312
    # weight_decay = 0.004643925899318933
    
    # lr = parameters[0][0]
    # momentum = parameters[0][1]
    # weight_decay = parameters[0][2]
    # print(lr, momentum, weight_decay)
    
    directory = './temp_model/CNN_101/'  # Replace with the path to your directory
    pth_files = list_pth_files(directory)
    train_acc = []
    test_acc = []

    net = Learner(target_classes=10).to(device)
    for model_name in pth_files:
        print(model_name)
        net.load_state_dict(torch.load(f'{model_name}'))
    # criterion = nn.NLLLoss()
    # optimizer = optim.SGD(
    #     net.parameters(),
    #     lr=lr,
    #     momentum=momentum,
    #     weight_decay = weight_decay,
    # )
    # start_time = time.time()
        correct = 0
        total = 0
        with torch.no_grad():
            for data in trainloader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            
            accuracy = correct / total
            print("Training Accuracy: %.2f %%" % (100 * accuracy))
            train_acc.append(accuracy * 100)

            # print("Epoch %d, Loss: %.3f" % (e + 1, running_loss / len(trainloader)))

        correct = 0
        total = 0
        import os


        with torch.no_grad():
            for data in testloader:
                images, labels = data[0].to(device), data[1].to(device)
                outputs = net(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = correct / total
        end_time = time.time()
        test_acc.append(accuracy * 100)
        print("Test Accuracy: %.2f %%" % (100 * accuracy))
        
    plot_acc_scatter(train_acc, test_acc)


    # model_save_path = './temp_model/model.pth'
    # torch.save(net.state_dict(), model_save_path)

================================================
FILE: transopt/analysis/compile_tex.py
================================================
import os
import subprocess
import shutil


def compile_tex(tex_path, output_folder):
    # 保存当前工作目录
    original_cwd = os.getcwd()

    # 将路径转换为绝对路径
    tex_path = os.path.abspath(tex_path)
    output_folder = os.path.abspath(output_folder)

    # 获取文件名和文件夹路径
    folder, filename = os.path.split(tex_path)
    name, _ = os.path.splitext(filename)

    # 切换到tex文件所在的文件夹
    os.chdir(folder)

    try:
        # 编译tex文件
        subprocess.run(['pdflatex', filename], check=True)

        # 裁剪PDF文件
        pdf_path = os.path.join(folder, name + '.pdf')
        cropped_pdf_path = pdf_path.replace('.pdf', '-crop.pdf')
        subprocess.run(['pdfcrop', pdf_path, cropped_pdf_path], check=True)

        # 将裁剪后的PDF文件移动到输出文件夹，并去掉-crop
        output_pdf_path = os.path.join(output_folder, name + '.pdf')
        shutil.move(cropped_pdf_path, output_pdf_path)

    except subprocess.CalledProcessError as e:
        print(f"命令执行失败: {e}")
    finally:
        # 切换回原始工作目录
        os.chdir(original_cwd)

    # 删除.aux和.log文件以及未裁剪的PDF文件
    aux_path = os.path.join(folder, name + '.aux')
    log_path = os.path.join(folder, name + '.log')
    if os.path.exists(aux_path):
        os.remove(aux_path)
    if os.path.exists(log_path):
        os.remove(log_path)
    if os.path.exists(pdf_path):
        os.remove(pdf_path)

================================================
FILE: transopt/analysis/effect_size.py
================================================
import os
import json
import numpy as np
from transopt.utils.sk import Rx
from matplotlib import pyplot as plt


plot_dim = 7
file_path = f"/home/gsfall/data_files/synthetic/{plot_dim}d"

# file_path = f"/home/gsfall/data_files/SVM"

plot_tasks = ["Discus", "GriewankRosenbrock", "Rastrigin", "Rosenbrock", "Schwefel"]
# plot_tasks = ["SVM"]
rank = {}
for plot_task in plot_tasks:
    data_dict = {}
    for file_name in os.listdir(file_path):
        if file_name.endswith(".json"):
            parts = file_name.split("_")
            task = parts[0]
            method = "_".join(parts[1:]).split(".")[0]
            if task == plot_task:
                with open(os.path.join(file_path, file_name), "r") as f:
                    data = json.load(f)
                    data_dict[method] = data["m"]
    
    Rx_data = Rx.data(**data_dict)
    result = Rx.sk(Rx_data)
    for r in result:
        if r.rx in rank:
            rank[r.rx].append(r.rank)
        else:
            rank[r.rx] = [r.rank]

file_name = "rank.json"
with open(os.path.join(file_path, file_name), "w") as json_file:
    json.dump(rank, json_file)
pass

================================================
FILE: transopt/analysis/mds.py
================================================
import numpy as np
from sklearn.manifold import MDS
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt
import itertools

class FootPrint:
    def __init__(self, X, range):
        self.X = X
        self.ranges = range
        self.boundary_points = self.get_random_boundary_points(0)
        self.config_ids = np.arange(0, len(self.X) + len(self.boundary_points)).tolist()
        self.n_configs = len(self.config_ids)
        
        self._distance = None
        self._reduced_data = None
        

    def calculate_distances(self):
        """
        Calculate pairwise distances between configurations.

        Parameters:
        X (np.ndarray): Encoded data matrix.

        Returns:
        np.ndarray: Pairwise distances matrix.
        """
        distances = np.zeros((self.n_configs, self.n_configs))
        configs = np.vstack((self.X, self.boundary_points))
        for i in range(self.n_configs):
            for j in range(i + 1, self.n_configs):
                distances[i, j] = distances[j, i] = np.linalg.norm(configs[i] - configs[j])

        self._distances = distances

    def init_distances(self, config_ids, exclude_configs=False):
        """
        Initialize pairwise distances between configurations.

        Parameters:
        X (np.ndarray): Encoded data matrix.
        config_ids (List[int]): Corresponding config_ids.
        exclude_configs (bool): Whether to exclude the passed X. Default is False.

        Returns:
        np.ndarray: Pairwise distances matrix.
        """
        if not exclude_configs:
            self.calculate_distances()
        else:
            return np.zeros((0, 0))

    def update_distances(self, X, distances, config, rejection_threshold=0.0):
        """
        Update pairwise distances with a new configuration.

        Parameters:
        X (np.ndarray): Encoded data matrix.
        distances (np.ndarray): Pairwise distances matrix.
        config (np.ndarray): New configuration to add.
        rejection_threshold (float): Threshold for rejecting the config. Default is 0.0.

        Returns:
        bool: Whether the config was rejected or not.
        """
        n_configs = X.shape[0]
        new_distances = np.zeros((n_configs + 1, n_configs + 1))
        rejected = False

        if n_configs > 0:
            new_distances[:n_configs, :n_configs] = distances[:, :]
            for j in range(n_configs):
                d = np.linalg.norm(X[j] - config)
                if rejection_threshold is not None:
                    if d < rejection_threshold:
                        rejected = True
                        break

                new_distances[n_configs, j] = new_distances[j, n_configs] = d

        if not rejected:
            X = np.vstack((X, config))
            distances = new_distanceslist

        return rejected
        
    def get_random_boundary_points(self, num_samples):
        num_dims = len(self.ranges)
    
        combinations = list(itertools.product(*self.ranges))
        
        # random_boundary_indices = np.random.choice(len(combinations), num_samples, replace=False)
        # random_boundary_points = [combinations[i] for i in random_boundary_indices]

        return np.array(combinations)

        
    def get_mds(self):

        if self._distances is None:
            raise RuntimeError("You need to call `calculate` first.")

        mds = MDS(n_components=2, dissimilarity="precomputed", random_state=0)
        self._reduced_data =  mds.fit_transform(self._distances).tolist()
    
    def plot_embedding(self):
        """
        Plot the low-dimensional embedding.

        """
        plt.figure(figsize=(8, 6))
        plt.scatter(self._reduced_data[:len(self.X), 0], self._reduced_data[:len(self.X), 1], c='b', label='MDS Embedding')
        plt.scatter(self._reduced_data[len(self.X):, 0], self._reduced_data[len(self.X):, 1], c='r', marker= 'x', label='Boundary  points')

        plt.xlabel('Component 1')
        plt.ylabel('Component 2')
        plt.title('MDS Embedding')
        plt.legend()
        plt.grid(True)
        plt.show()


if __name__ == '__main__':
    # 示例数据
    X = np.random.rand(100, 5)
    bounds = [(0, 1), (0, 1),(0,1), (0,1), (0,1)]
    fp = FootPrint(X, bounds)
    fp.calculate_distances()
    fp.get_mds()
    fp.plot_embedding()

================================================
FILE: transopt/analysis/parameter_network.py
================================================
import os
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor


def calculate_importances(X, y):
    """
    Calculates and returns parameter importances.
    """

    model = DecisionTreeRegressor()
    model.fit(X, y[:, np.newaxis])
    feature_importances = model.feature_importances_

    return feature_importances


def calculate_interaction(X, y):
    num_parameters = X.shape[1]
    h_matrix = np.zeros((num_parameters, num_parameters))

    # 训练单个变量的模型
    single_models = []
    for i in range(num_parameters):
        model = RandomForestRegressor(n_estimators=50, random_state=42)
        model.fit(X[:, [i]], y)
        single_models.append(model)

    # 两两特征组合，计算 H^2
    for (i, j) in combinations(range(num_parameters), 2):
        model_jk = RandomForestRegressor(n_estimators=50, random_state=42)
        model_jk.fit(X[:, [i, j]], y)
        f_jk = model_jk.predict(X[:, [i, j]])

        f_j = single_models[i].predict(X[:, [i]])
        f_k = single_models[j].predict(X[:, [j]])

        numerator = np.sqrt(np.sum((f_jk - f_j - f_k) ** 2))

        h_matrix[i, j] = numerator
        h_matrix[j, i] = h_matrix[i, j]

    mean = np.mean(h_matrix)
    std = np.std(h_matrix)

    normalized_matrix = (h_matrix - mean) / std
    scaled_matrix = 1 / (1 + np.exp(-normalized_matrix))
    
    return scaled_matrix


def plot_network(X, y, nodes):
    G = nx.Graph()
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15, 9))
    
    nodes_weight = calculate_importances(X, y)
    for node, weight in zip(nodes, nodes_weight):
        G.add_node(node, weight=weight)

    edges_weight = calculate_interaction(X, y)
    
    for i in range(len(nodes)):
        for j in range(i + 1, len(nodes)):
            weight = np.random.uniform(0, 1)  # 生成一个0到1之间的随机权重
            G.add_edge(nodes[i], nodes[j], weight=weight)
        
        
    # for i in range(5):
    #     for j in range(i + 1, 5):
    #         weight = edges_weight[i, j]
    #         G.add_edge(nodes[i], nodes[j], weight=weight)
    
    # 设置节点的位置为圆形布局
    pos = nx.circular_layout(G)

    # 创建颜色映射
    node_cmap = plt.cm.Greens
    edge_cmap = plt.cm.Blues

    # 节点的颜色根据权重映射
    node_color = [node_cmap(data['weight']) for v, data in G.nodes(data=True)]
    node_size = [data['weight'] * 2000 + 1000 for v, data in G.nodes(data=True)]
    node_alpha = [data['weight'] for v, data in G.nodes(data=True)]  # 透明度根据权重调整

    # 绘制网络图
    edges = G.edges(data=True)
    nx.draw_networkx_nodes(G, pos, node_color=node_color, node_size=node_size, alpha=node_alpha)
    nx.draw_networkx_labels(G, pos, font_color='white', font_size=16)

    # 单独绘制每条边，设置颜色和透明度
    for u, v, data in edges:
        color = edge_cmap(data['weight'])
        nx.draw_networkx_edges(G, pos, edgelist=[(u, v)], width=3, alpha=data['weight'], edge_color=[color])

    fig.set_facecolor("None")
    ax.set_facecolor("#191C36")
    # ax.axis('off')

    path = os.getcwd()
    save_path = os.path.join(path, "webui/src/pictures/parameter_network.png")
    plt.savefig(save_path, bbox_inches='tight')
    plt.clf()
    plt.close()
    # 显示图形
    # plt.show()


if __name__ == "__main__":
    np.random.seed(0)
    X = np.random.normal(0, 1, (100, 5))  # 5个特征
    y = 5 * X[:, 0] * X[:, 1] + 3 * X[:, 2] + X[:, 3] + np.random.normal(0, 0.5, 100)
    plot_network(X, y, nodes=['X1', 'X2', 'X3', 'X4', 'X5'])

================================================
FILE: transopt/analysis/table.py
================================================
import numpy as np
from collections import defaultdict
from transopt.utils.sk import Rx
import scipy
import os
from multiprocessing import Process, Manager
from transopt.analysis.table_to_latex import matrix_to_latex
from transopt.analysis.compile_tex import compile_tex
from transopt.agent.services import Services


class Result():
    def __init__(self):
        self.X = None
        self.Y = None
        self.best_X = None
        self.best_Y = None


def get_results(task_names):
    manager = Manager()
    task_queue = manager.Queue()
    result_queue = manager.Queue()
    db_lock = manager.Lock()
    services = Services(task_queue, result_queue, db_lock)

    results = {}
    methods = []
    tasks = []
    for group_id, group in enumerate(task_names):
        for task_name in group:
            r = Result()
            table_info = services.data_manager.db.query_dataset_info(task_name)
            task = table_info['additional_config']['problem_name']
            method = table_info['additional_config']['Model']
            seed = table_info['additional_config']['seeds']
            if method not in methods:
                methods.append(method)
            if task not in tasks:
                tasks.append(task)
            
            all_data = services.data_manager.db.select_data(task_name)
            objectives = table_info["objectives"]
            obj = objectives[0]["name"]
            obj_data = [data[obj] for data in all_data]
            var_data = [[data[var["name"]] for var in table_info["variables"]] for data in all_data]
            r.X = np.array(var_data)
            r.Y = np.array(obj_data)
            best_id = np.argmin(r.Y)
            r.best_X = r.X[best_id]
            r.best_Y = r.Y[best_id]
            if task not in results:
                results[task] = defaultdict(dict)
            if method not in results[task]:
                results[task][method] = defaultdict(dict)
            results[task][method][seed] = r

    return results, methods, tasks


def record_mean_std(task_names, save_path, **kwargs):
    # Similar to record_mean_std function in PeerComparison.py
    res_mean = {}
    res_std = {}
    res_sig = {}
    results, methods, tasks = get_results(task_names)
    for task_name, task_r in results.items():
        result_mean = []
        result_std = []
        data = {}
        data_mean = {}
        for method, method_r in task_r.items():
            best = []
            for seed, result_obj in method_r.items():
                best.append(result_obj.best_Y)
                data[method] = best.copy()
                data_mean[method] = (np.mean(best), np.std(best))
                result_mean.append(np.mean(best))
                result_std.append(np.std(best))

        res_mean[task_name] = result_mean
        res_std[task_name] = result_std
        rst_m = {}
        sorted_dic = sorted(data_mean.items(), key=lambda kv: (kv[1][0]))
        for method in methods:
            if method == sorted_dic[0][0]:
                rst_m[method] = '-'
                continue
            s, p = scipy.stats.mannwhitneyu(data[sorted_dic[0][0]], data[method], alternative='two-sided')
            if p < 0.05:
                rst_m[method] = '+'
            else:
                rst_m[method] = '-'
        res_sig[task_name] = rst_m
    latex_code = matrix_to_latex({'mean':res_mean, 'std':res_std, 'significance':res_sig}, tasks, methods,
                                 caption='Performance comparisons of the quality of solutions obtained by different algorithms.')
    save_path = os.path.join(save_path, 'Overview')
    os.makedirs(save_path, exist_ok=True)
    tex_save_path = os.path.join(save_path, 'tex')
    os.makedirs(tex_save_path, exist_ok=True)
    table_path = os.path.join(save_path, 'Table')
    os.makedirs(table_path, exist_ok=True)
    
    with open(os.path.join(tex_save_path, 'compare_mean.tex'), 'w') as f:
        f.write(latex_code)
    try:
        compile_tex(os.path.join(tex_save_path, 'compare_mean.tex'), table_path)
    except:
        pass

    print(f"LaTeX code has been saved to {tex_save_path}")


if __name__ == "__main__":
    task_names = [['Sphere_w1_s1_1715591439', 'Sphere_w1_s1_1715592120']]
    save_path = '/home/gsfall'
    record_mean_std(task_names, save_path)

================================================
FILE: transopt/analysis/table_to_latex.py
================================================
import numpy as np
from typing import Union, Dict


def matrix_to_latex(Data: Dict, col_names, row_names, caption, oder="min"):
    mean = Data["mean"]
    std = Data["std"]
    significance = Data["significance"]
    num_cols = len(mean.keys())
    num_rows = len(row_names)

    if len(col_names) != num_cols or len(row_names) != num_rows:
        raise ValueError(
            "Mismatch between matrix dimensions and provided row/column names."
        )

    latex_code = []
    # 添加文档类和宏包
    latex_code.append("\\documentclass{article}")
    latex_code.append("\\usepackage{geometry}")
    latex_code.append("\\geometry{a4paper, margin=1in}")
    latex_code.append("\\usepackage{graphicx}")
    latex_code.append("\\usepackage{colortbl}")
    latex_code.append("\\usepackage{booktabs}")
    latex_code.append("\\usepackage{threeparttable}")
    latex_code.append("\\usepackage{caption}")
    latex_code.append("\\usepackage{xcolor}")
    latex_code.append("\\pagestyle{empty}")

    # 开始文档
    latex_code.append("\\begin{document}")
    latex_code.append("")
    latex_code.append("\\begin{table*}[t!]")
    latex_code.append("    \\scriptsize")
    latex_code.append("    \\centering")
    latex_code.append(f"    \\caption{{{caption}}}")
    latex_code.append("    \\resizebox{1.0\\textwidth}{!}{")
    latex_code.append("    \\begin{tabular}{c|" + "".join(["c"] * (num_rows)) + "}")
    latex_code.append("        \\hline")

    # Adding column names
    col_header = " & ".join([""] + row_names) + " \\\\"
    latex_code.append("        " + col_header)
    latex_code.append("        \\hline")

    # Adding rows
    for i in range(num_cols):
        str_data = []
        for j in range(num_rows):
            str_format = ""
            if oder == "min":
                if mean[col_names[i]][j] == np.min(mean[col_names[i]]):
                    str_format += "\cellcolor[rgb]{ .682,  .667,  .667}\\textbf{"
                    str_format += "%.3E(%.3E)" % (
                        float(mean[col_names[i]][j]),
                        std[col_names[i]][j],
                    )
                    str_format += "}"
                    str_data.append(str_format)
                else:
                    if significance[col_names[i]][row_names[j]] == "+":
                        str_data.append(
                            "%.3E(%.3E)$^\dagger$"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
                    else:
                        str_data.append(
                            "%.3E(%.3E)"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
            else:
                if mean[col_names[i]][j] == np.max(mean[col_names[i]]):
                    str_format += "\cellcolor[rgb]{ .682,  .667,  .667}\\textbf{"
                    str_format += "%.3E(%.3E)" % (
                        float(mean[col_names[i]][j]),
                        std[col_names[i]][j],
                    )
                    str_format += "}"
                    str_data.append(str_format)
                else:
                    if significance[col_names[i]][row_names[j]] == "+":
                        str_data.append(
                            "%.3E(%.3E)$^\dagger$"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
                    else:
                        str_data.append(
                            "%.3E(%.3E)"
                            % (float(mean[col_names[i]][j]), std[col_names[i]][j])
                        )
        test_name = col_names[i] + col_names[i]
        row_data = " & ".join(["\\texttt{" + f"{test_name}" + "}"] + str_data) + " \\\\"
        latex_code.append("        " + row_data)

    latex_code.append("        \\hline")
    latex_code.append("    \\end{tabular}")
    latex_code.append("    }")
    latex_code.append("    \\begin{tablenotes}")
    latex_code.append("        \\tiny")
    latex_code.append(
        "        \\item The labels in the first column are the combination of the first letter of test problem and the number of variables, e.g., A4 is Ackley problem with $n=4$."
    )
    latex_code.append(
        "        \\item $^\\dagger$ indicates that the best algorithm is significantly better than the other one according to the Wilcoxon signed-rank test at a 5\\% significance level."
    )
    latex_code.append("    \\end{tablenotes}")
    latex_code.append("\\end{table*}%")
    latex_code.append("\\end{document}")

    return "\n".join(latex_code)


================================================
FILE: transopt/benchmark/CPD/__init__.py
================================================
from transopt.benchmark.CPD.PCM.pcm import PCM
from transopt.benchmark.CPD.Absolut.absolut import Absolut

================================================
FILE: transopt/benchmark/CSSTuning/Compiler.py
================================================
import numpy as np
from csstuning.compiler.compiler_benchmark import GCCBenchmark, LLVMBenchmark

from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.space.variable import *


@problem_registry.register("Compiler_GCC")
class GCCTuning(NonTabularProblem):
    problem_type = 'compiler'
    workloads = GCCBenchmark.AVAILABLE_WORKLOADS
    num_variables = 104
    num_objectives = 3
    fidelity = None
    
    def __init__(self, task_name, budget_type, budget, seed, workload, knobs=None, **kwargs):        
        self.workload = workload or GCCBenchmark.AVAILABLE_WORKLOADS[0]
        self.benchmark = GCCBenchmark(workload=self.workload)
        
        all_knobs = self.benchmark.get_config_space()
        self.knobs = {k: all_knobs[k] for k in (knobs or all_knobs)}
        self.num_variables = len(self.knobs)
        
        super().__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        np.random.seed(seed)

    def get_configuration_space(self):
        variables = []
        for knob_name, knob_details in self.knobs.items():
            knob_type = knob_details["type"]
            range_ = knob_details["range"]
            
            if knob_type == "enum":
                variables.append(Categorical(knob_name, range_))
            elif knob_type == "integer":
                variables.append(Integer(knob_name, range_))

        return SearchSpace(variables)
    
    def get_fidelity_space(self):
        return FidelitySpace([])
    
    def get_objectives(self) -> dict:
        return {
            "execution_time": "minimize",
            "compilation_time": "minimize",
            "file_size": "minimize",
            # "maxrss": "minimize",
            # "PAPI_TOT_CYC": "minimize",
            # "PAPI_TOT_INS": "minimize",
            # "PAPI_BR_MSP": "minimize",
            # "PAPI_BR_PRC": "minimize",
            # "PAPI_BR_CN": "minimize",
            # "PAPI_MEM_WCY": "minimize",
        }
    
    def get_problem_type(self):
        return self.problem_type
    
    def objective_function(self, configuration: dict, fidelity = None, seed = None, **kwargs):        
        try:
            perf = self.benchmark.run(configuration)
            return {obj: perf.get(obj, 1e10) for obj in self.get_objectives()}
        except Exception as e:
            return {obj: 1e10 for obj in self.get_objectives()}
        

@problem_registry.register("Compiler_LLVM")
class LLVMTuning(NonTabularProblem):
    problem_type = 'compiler'
    workloads = LLVMBenchmark.AVAILABLE_WORKLOADS
    num_variables = 82
    num_objectives = 3
    fidelity = None
    
    def __init__(self, task_name, budget_type, budget, seed, workload, knobs=None, **kwargs):
        self.workload = workload or LLVMBenchmark.AVAILABLE_WORKLOADS[0]
        self.benchmark = LLVMBenchmark(workload=self.workload)
        
        all_knobs = self.benchmark.get_config_space()
        self.knobs = {k: all_knobs[k] for k in (knobs or all_knobs)}
        self.num_variables = len(self.knobs)
        
        super().__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        np.random.seed(seed)

    def get_configuration_space(self):
        variables = []
        for knob_name, knob_details in self.knobs.items():
            knob_type = knob_details["type"]
            range_ = knob_details["range"]
            
            if knob_type == "enum":
                variables.append(Categorical(knob_name, range_))
            elif knob_type == "integer":
                variables.append(Integer(knob_name, range_))

        return SearchSpace(variables)
    
    def get_fidelity_space(self):
        return FidelitySpace([])
    
    def get_objectives(self) -> dict:
        return {
            "execution_time": "minimize",
            "compilation_time": "minimize",
            "file_size": "minimize",
        }
    
    def get_problem_type(self):
        return self.problem_type
    
    def objective_function(self, configuration: dict, fidelity = None, seed = None, **kwargs): 
        try:
            perf = self.benchmark.run(configuration)
            return {obj: perf.get(obj, 1e10) for obj in self.get_objectives()}
        except Exception as e:
            return {obj: 1e10 for obj in self.get_objectives()}


if __name__ == "__main__":
    benchmark = GCCBenchmark(workload="cbench-automotive-bitcount")
    conf = {
        
    }
    benchmark.run(conf)


================================================
FILE: transopt/benchmark/CSSTuning/DBMS.py
================================================
import numpy as np
from csstuning.dbms.dbms_benchmark import MySQLBenchmark

from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.space.variable import *


@problem_registry.register("DBMS_MySQL")
class MySQLTuning(NonTabularProblem):
    problem_type = 'dbms'
    workloads = MySQLBenchmark.AVAILABLE_WORKLOADS
    num_variables = 197
    num_objectives = 2
    fidelity = None
    
    def __init__(self, task_name, budget_type, budget, seed, workload, knobs=None, **kwargs):        
        self.workload = workload or MySQLBenchmark.AVAILABLE_WORKLOADS[0]

        self.benchmark = MySQLBenchmark(workload=self.workload)
        self.knobs = self.benchmark.get_config_space()
        self.num_variables = len(self.knobs)
        
        super().__init__(task_name, budget_type, budget, workload, seed)
        np.random.seed(seed)


    def get_configuration_space(self):
        variables = []
        
        for knob_name, knob_details in self.knobs.items():
            knob_type = knob_details["type"]
            range_ = knob_details["range"]
            
            if knob_type == "enum":
                variables.append(Categorical(knob_name, range_))
            elif knob_type == "integer":
                if range_[1] > np.iinfo(np.int64).max:
                    variables.append(ExponentialInteger(knob_name, range_))
                else:
                    variables.append(Integer(knob_name, range_))

        return SearchSpace(variables)
    
    def get_fidelity_space(self):
        return FidelitySpace([])
    
    def get_objectives(self) -> dict:
        return {
            "latency": "minimize",
            "throughput": "maximize",
        }
        
    def get_problem_type(self):
        return self.problem_type
        
    def objective_function(self, configuration: dict, fidelity = None, seed = None, **kwargs):
        try:
            perf = self.benchmark.run(configuration)
            return {obj: perf.get(obj, 1e10) for obj in self.get_objectives()}
        except Exception as e:
            return {obj: 1e10 for obj in self.get_objectives()}

if __name__ == "__main__":
    # a = DBMSTuning("1", 121, 0, 1)
    pass


================================================
FILE: transopt/benchmark/CSSTuning/__init__.py
================================================
from transopt.benchmark.CSSTuning.Compiler import GCCTuning
from transopt.benchmark.CSSTuning.DBMS import MySQLTuning

================================================
FILE: transopt/benchmark/DownloadBench/references
================================================
https://github.com/automl/HPOBench

https://github.com/releaunifreiburg/HPO-B

================================================
FILE: transopt/benchmark/HBOROB/algorithms.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd

import copy
import numpy as np
from collections import OrderedDict


from transopt.benchmark.HPOOOD import networks
from transopt.benchmark.HPOOOD.misc import (
    random_pairs_of_minibatches, split_meta_train_test, ParamDict,
    MovingAverage, l2_between_dicts, proj, Nonparametric, SupConLossLambda
)


ALGORITHMS = [
    'ERM',
]

def get_algorithm_class(algorithm_name):
    """Return the algorithm class with the given name."""
    if algorithm_name not in globals():
        raise NotImplementedError("Algorithm not found: {}".format(algorithm_name))
    return globals()[algorithm_name]

class Algorithm(torch.nn.Module):
    """
    A subclass of Algorithm implements a domain generalization algorithm.
    Subclasses should implement the following:
    - update()
    - predict()
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(Algorithm, self).__init__()
        self.hparams = hparams

    def update(self, minibatches, unlabeled=None):
        """
        Perform one update step, given a list of (x, y) tuples for all
        environments.

        Admits an optional list of unlabeled minibatches from the test domains,
        when task is domain_adaptation.
        """
        raise NotImplementedError

    def predict(self, x):
        raise NotImplementedError


class MLP(nn.Module):
    """Just  an MLP"""
    def __init__(self, n_inputs, n_outputs, hparams):
        super(MLP, self).__init__()
        self.input = nn.Linear(n_inputs, hparams['mlp_width'])
        self.dropout = nn.Dropout(hparams['mlp_dropout'])
        self.hiddens = nn.ModuleList([
            nn.Linear(hparams['mlp_width'], hparams['mlp_width'])
            for _ in range(hparams['mlp_depth']-2)])
        self.output = nn.Linear(hparams['mlp_width'], n_outputs)
        self.n_outputs = n_outputs

    def forward(self, x):
        x = self.input(x)
        x = self.dropout(x)
        x = F.relu(x)
        for hidden in self.hiddens:
            x = hidden(x)
            x = self.dropout(x)
            x = F.relu(x)
        x = self.output(x)
        return x


class ResNet(torch.nn.Module):
    """ResNet with the softmax chopped off and the batchnorm frozen"""
    def __init__(self, input_shape, hparams):
        super(ResNet, self).__init__()
        if hparams['resnet18']:
            self.network = torchvision.models.resnet18(pretrained=True)
            self.n_outputs = 512
        else:
            self.network = torchvision.models.resnet50(pretrained=True)
            self.n_outputs = 2048

        # self.network = remove_batch_norm_from_resnet(self.network)

        # adapt number of channels
        nc = input_shape[0]
        if nc != 3:
            tmp = self.network.conv1.weight.data.clone()

            self.network.conv1 = nn.Conv2d(
                nc, 64, kernel_size=(7, 7),
                stride=(2, 2), padding=(3, 3), bias=False)

            for i in range(nc):
                self.network.conv1.weight.data[:, i, :, :] = tmp[:, i % 3, :, :]

        # save memory
        del self.network.fc
        self.network.fc = Identity()

        self.freeze_bn()
        self.hparams = hparams
        self.dropout = nn.Dropout(hparams['resnet_dropout'])

    def forward(self, x):
        """Encode x into a feature vector of size n_outputs."""
        return self.dropout(self.network(x))

    def train(self, mode=True):
        """
        Override the default train() to freeze the BN parameters
        """
        super().train(mode)
        self.freeze_bn()

    def freeze_bn(self):
        for m in self.network.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()


class ERM(Algorithm):
    """
    Empirical Risk Minimization (ERM)
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(ERM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])

        self.network = nn.Sequential(self.featurizer, self.classifier)
        self.optimizer = torch.optim.Adam(
            self.network.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay'],
        )

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        loss = F.cross_entropy(self.predict(all_x), all_y)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}

    def predict(self, x):
        return self.network(x)


================================================
FILE: transopt/benchmark/HBOROB/hporobust.py
================================================
# Install robustbench if you haven't already
# !pip install robustbench

from robustbench.utils import load_model
from robustbench.data import load_cifar10
from robustbench.eval import benchmark

# Step 1: Load a pre-trained robust model from RobustBench
model = load_model(model_name='Standard', dataset='cifar10', threat_model='Linf')

# Step 2: Load the CIFAR-10 test dataset
x_test, y_test = load_cifar10(n_examples=1000)

# Step 3: Evaluate the model's robustness
# We will use the AutoAttack suite to evaluate the model.
from robustbench.utils import clean_accuracy, AutoAttack

# Evaluate clean accuracy
clean_acc = clean_accuracy(model, x_test, y_test)
print(f'Clean accuracy: {clean_acc * 100:.2f}%')

# Step 4: Perform adversarial evaluation using AutoAttack
adversary = AutoAttack(model, norm='Linf', eps=8/255)
adv_acc = adversary.run_standard_evaluation(x_test, y_test, bs=128)

print(f'Robust accuracy against AutoAttack: {adv_acc * 100:.2f}%')

================================================
FILE: transopt/benchmark/HBOROB/test.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
from robustbench.data import load_cifar10, load_cifar10c
from robustbench.utils import clean_accuracy, load_model

import transopt.benchmark.HPO.networks
from transopt.benchmark.HPO.algorithms import ERM
from transopt.benchmark.HPO.wide_resnet import WideResNet


# 加载 CIFAR-10 数据集
x_test, y_test = load_cifar10(load_cifar10='~/transopt_files/data/')

# 转换为 Tensor
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

hparams = {
    'lr': 0.001,
    'weight_decay': 5e-4,
    'nonlinear_classifier': True
}

input_shape = (3, 32, 32)
num_classes = 10
num_domains = 1

model = ERM(input_shape, num_classes, num_domains, hparams)

from torch.utils.data import DataLoader, TensorDataset

# 使用训练数据
train_loader = DataLoader(TensorDataset(x_test, y_test), batch_size=64, shuffle=True)

# 训练模型
for epoch in range(10):  # 训练10个epoch
    for batch in train_loader:
        minibatches = [(batch[0], batch[1])]
        model.update(minibatches)

    print(f"Epoch {epoch + 1} completed")
    
    
corruptions = ['fog']
x_test, y_test = load_cifar10c(n_examples=1000, corruptions=corruptions, severity=5)

for model_name in ['Standard', 'Engstrom2019Robustness', 'Rice2020Overfitting',
                   'Carmon2019Unlabeled']:
    model = load_model(model_name, dataset='cifar10', threat_model='Linf')
    acc = clean_accuracy(model, x_test, y_test)
    print(f'Model: {model_name}, CIFAR-10-C accuracy: {acc:.1%}')

================================================
FILE: transopt/benchmark/HPO/HPO.py
================================================
import collections
import os
import random
import time
import json
from typing import Dict, Union
from tqdm import tqdm

import numpy as np
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

from transopt.benchmark.HPO import datasets

import transopt.benchmark.HPO.misc as misc
from transopt.agent.registry import problem_registry
from transopt.benchmark.HPO.fast_data_loader import (FastDataLoader,
                                                     InfiniteDataLoader)
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.space.variable import *
from transopt.benchmark.HPO import algorithms
from transopt.benchmark.HPO.hparams_registry import get_hparam_space
from transopt.benchmark.HPO.networks import SUPPORTED_ARCHITECTURES

class HPO_base(NonTabularProblem):
    problem_type = 'hpo'
    num_variables = 4
    num_objectives = 1
    workloads = []
    fidelity = None
    
    ALGORITHMS = [
        'ERM',
        # 'BayesianNN',
        # 'GLMNet'
    ]
    
    ARCHITECTURES = SUPPORTED_ARCHITECTURES
    
    DATASETS = [
    "RobCifar10",
    # "RobCifar100",
    # "RobImageNet",
    ]

    def __init__(
        self, task_name, budget_type, budget, seed, workload, algorithm, architecture, model_size, **kwargs
        ):
        
        # Check if algorithm is valid
        if algorithm not in HPO_base.ALGORITHMS:
            raise ValueError(f"Invalid algorithm: {algorithm}. Must be one of {HPO_base.ALGORITHMS}")
        self.algorithm_name = algorithm

        # Check if workload is valid
        if workload < 0 or workload >= len(HPO_base.DATASETS):
            raise ValueError(f"Invalid workload: {workload}. Must be between 0 and {len(HPO_base.DATASETS) - 1}")
        self.dataset_name = HPO_base.DATASETS[workload]

        # Check if architecture is valid
        if architecture not in HPO_base.ARCHITECTURES:
            raise ValueError(f"Invalid architecture: {architecture}. Must be one of {list(HPO_base.ARCHITECTURES.keys())}")
        if model_size not in HPO_base.ARCHITECTURES[architecture]:
            raise ValueError(f"Invalid model_size: {model_size} for architecture: {architecture}. Must be one of {HPO_base.ARCHITECTURES[architecture]}")
        self.architecture = architecture
        self.model_size = model_size
        
        self.hpo_optimizer = kwargs.get('optimizer', 'random')

        super(HPO_base, self).__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        
        self.query_counter = kwargs.get('query_counter', 0)
        self.trial_seed = seed
        self.hparams = {}
        
        base_dir = kwargs.get('base_dir', os.path.expanduser('~'))
        print(base_dir)
        self.data_dir = os.path.join(base_dir, 'transopt_tmp/data/')
        self.model_save_dir  = os.path.join(base_dir, f'transopt_tmp/output/models/{self.hpo_optimizer}_{self.algorithm_name}_{self.architecture}_{self.model_size}_{self.dataset_name}_{seed}/')
        self.results_save_dir  = os.path.join(base_dir, f'transopt_tmp/output/results/{self.hpo_optimizer}_{self.algorithm_name}_{self.architecture}_{self.model_size}_{self.dataset_name}_{seed}/')
        
        print(f"Selected algorithm: {self.algorithm_name}, dataset: {self.dataset_name}")
        print(f"Model architecture: {self.architecture}")
        if hasattr(self, 'model_size'):
            print(f"Model size: {self.model_size}")
        else:
            print("Model size not specified")
        
        os.makedirs(self.model_save_dir, exist_ok=True)
        os.makedirs(self.results_save_dir, exist_ok=True)

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        
        # Get the GPU ID from hparams, default to 0 if not specified
        gpu_id = kwargs.get('gpu_id', 0)
        
        if torch.cuda.is_available():
            # Check if the specified GPU exists
            if gpu_id < torch.cuda.device_count():
                self.device = torch.device(f"cuda:{gpu_id}")
            else:
                print(f"Warning: GPU {gpu_id} not found. Defaulting to CPU.")
                self.device = torch.device("cpu")
        else:
            self.device = torch.device("cpu")
        
        print(f"Using device: {self.device}")
        
        # 将最终使用的设备写入hparams
        self.hparams['device'] = str(self.device)
        
        print(f"Using device: {self.device}")
        
        if self.dataset_name in vars(datasets):
            self.dataset = vars(datasets)[self.dataset_name](root=self.data_dir, augment=kwargs.get('augment', None))
        else:
            raise NotImplementedError
        if self.hparams.get('augment', None) == 'mixup':
            self.mixup = True
        else:
            self.mixup = False
        
        print(f"Using augment: {kwargs.get('augment', None)}")
        
        self.eval_loaders, self.eval_loader_names = self.create_test_loaders(128)


        self.checkpoint_vals = collections.defaultdict(lambda: [])
        
    def create_train_loaders(self, batch_size):
        if not hasattr(self, 'dataset') or self.dataset is None:
            raise ValueError("Dataset not initialized. Please ensure self.dataset is set before calling this method.")
        
        train_loaders = FastDataLoader(
            dataset=self.dataset.datasets['train'],
            batch_size=batch_size,
            num_workers=2)  # Assuming N_WORKERS is 2, adjust if needed
        
        val_loaders = FastDataLoader(
            dataset=self.dataset.datasets['val'],
            batch_size=batch_size,
            num_workers=2)  # Assuming N_WORKERS is 2, adjust if needed

        return train_loaders, val_loaders
    

    def create_test_loaders(self, batch_size):
        if not hasattr(self, 'dataset') or self.dataset is None:
            raise ValueError("Dataset not initialized. Please ensure self.dataset is set before calling this method.")
        
        eval_loaders = []
        eval_loader_names = []

        # Get all available test set names
        available_test_sets = self.dataset.get_available_test_set_names()

        for test_set_name in available_test_sets:
            if test_set_name.startswith('test_'):
                eval_loaders.append(FastDataLoader(
                    dataset=self.dataset.datasets[test_set_name],
                    batch_size=batch_size,
                    num_workers=2))  # Assuming N_WORKERS is 2, adjust if needed
                eval_loader_names.append(test_set_name)

        return eval_loaders, eval_loader_names
    

    def save_checkpoint(self, filename):
        save_dict = {
            "model_input_shape": self.dataset.input_shape,
            "model_num_classes": self.dataset.num_classes,
            "model_hparams": self.hparams,
            "model_dict": self.algorithm.state_dict()
        }
        torch.save(save_dict, os.path.join(self.model_save_dir, filename))
        
    def get_configuration_space(
        self, seed: Union[int, None] = None):

        hparam_space = get_hparam_space(self.algorithm_name, self.model_size, self.architecture)
        variables = []

        for name, (hparam_type, range) in hparam_space.items():
            if hparam_type == 'categorical':
                variables.append(Categorical(name, range))
            elif hparam_type == 'float':
                variables.append(Continuous(name, range))
            elif hparam_type == 'int':
                variables.append(Integer(name, range))
            elif hparam_type == 'log':
                variables.append(LogContinuous(name, range))

        ss = SearchSpace(variables)
        return ss
    
    def get_fidelity_space(
        self, seed: Union[int, None] = None):

        fs = FidelitySpace([
            Integer("epoch", [1, 100])  # Adjust the range as needed
        ])
        return fs
    
    def train(self, configuration: dict):
        
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
        self.epoches = configuration['epoch']
        print(f"Total epochs: {self.epoches}")
                
        self.train_loader, self.val_loader = self.create_train_loaders(self.hparams['batch_size'])
        
        self.hparams['nonlinear_classifier'] = True
    
        for epoch in range(self.epoches):
            epoch_start_time = time.time()
            epoch_loss = 0.0
            epoch_correct = 0
            epoch_total = 0
            
            self.algorithm.train()
            total_batches = len(self.train_loader)
            for x, y in tqdm(self.train_loader, total=total_batches, desc=f"Epoch {epoch+1}/{self.epoches}", unit="batch"):
                step_start_time = time.time()
                minibatches_device = [(x.to(self.device), y.to(self.device))]

                step_vals = self.algorithm.update(minibatches_device)
                self.checkpoint_vals['step_time'].append(time.time() - step_start_time)

                for key, val in step_vals.items():
                    self.checkpoint_vals[key].append(val)
                
                # Update epoch statistics
                epoch_loss += step_vals.get('loss', 0.0)
                epoch_correct += step_vals.get('correct', 0)
                epoch_total += sum(len(x) for x, _ in minibatches_device)

            # Compute and print epoch metrics
            epoch_acc = epoch_correct / epoch_total if epoch_total > 0 else 0
            epoch_loss /= len(self.train_loader)
            print(f"Epoch {epoch+1}/{self.epoches} - Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

        # Evaluate on validation set
        val_acc = self.evaluate_loader(self.val_loader)

        # Calculate final results after all epochs
        results = {
            'epoch': self.epoches,
            'epoch_time': time.time() - epoch_start_time,
            'train_loss': epoch_loss,
            'train_acc': epoch_acc,
            'val_acc': val_acc,
        }

        # Evaluate on all test loaders
        for name, loader in zip(self.eval_loader_names, self.eval_loaders):
            results[f'{name}_acc'] = self.evaluate_loader(loader)

        # Calculate memory usage
        results['mem_gb'] = torch.cuda.max_memory_allocated() / (1024.**3)

        results['hparams'] = self.hparams
        
        return results

    def save_epoch_results(self, results):
        epoch_path = os.path.join(self.results_save_dir, f"epoch_{results['epoch']}.json")
        with open(epoch_path, 'w') as f:
            json.dump(results, f, indent=2)

    def evaluate_loader(self, loader):
        self.algorithm.eval()
        correct = total = 0
        with torch.no_grad():
            for x, y in loader:
                x, y = x.to(self.device), y.to(self.device)
                p = self.algorithm.predict(x)
                correct += (p.argmax(1).eq(y) if p.size(1) != 1 else p.gt(0).eq(y)).float().sum().item()
                total += len(x)
        self.algorithm.train()
        return correct / total

    def get_score(self, configuration: dict):
        for key, value in configuration.items():
            self.hparams[key] = value
        
        algorithm_class = algorithms.get_algorithm_class(self.algorithm_name)
        self.algorithm = algorithm_class(self.dataset.input_shape, self.dataset.num_classes, self.architecture, self.model_size, self.mixup, self.device, self.hparams)
        self.algorithm.to(self.device)
        
        self.query_counter += 1
        results = self.train(configuration)
        
        # Construct filename with query and all hyperparameters
        filename_parts = [f"{self.query_counter}"]
        for key, value in configuration.items():
            filename_parts.append(f"{key}_{value}")
        filename = "_".join(filename_parts)

        # Save results
        epochs_path = os.path.join(self.results_save_dir, f"{filename}.jsonl")
        with open(epochs_path, 'w') as f:
            json.dump(results, f, indent=2)
        
        # Save final checkpoint and mark as done
        self.save_checkpoint(f"{filename}_model.pkl")
        with open(os.path.join(self.model_save_dir, 'done'), 'w') as f:
            f.write('done')

        val_acc = results['val_acc']
        
        return val_acc, results
        

    def objective_function(
        self,
        configuration,
        fidelity = None,
        seed = None,
        **kwargs
    ) -> Dict:

        if fidelity is None:
            fidelity = {"epoch": 50}
        
        # Convert log scale values back to normal scale
        c = self.configuration_space.map_to_design_space(configuration)
        
        # Add fidelity (epoch) to the configuration
        c["epoch"] = fidelity["epoch"]        
        c['class_balanced'] = True
        c['nonlinear_classifier'] = True
        
        val_acc, results = self.get_score(c)

        acc = {list(self.objective_info.keys())[0]: float(val_acc)}
        
        # Add standard test accuracy
        acc['test_standard_acc'] = float(results['test_standard_acc'])
        
        # Calculate average of other test accuracies
        other_test_accs = [v for k, v in results.items() if k.startswith('test_') and k != 'test_standard_acc']
        if other_test_accs:
            acc['test_robust_acc'] = float(sum(other_test_accs) / len(other_test_accs))
        
        
        return acc
    
    def get_objectives(self) -> Dict:
        return {'function_value': 'minimize'}
    
    def get_problem_type(self):
        return "hpo"


@problem_registry.register("HPO_ERM")
class HPO_ERM(HPO_base):    
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):            
        algorithm = kwargs.pop('algorithm', 'ERM')
        architecture = kwargs.pop('architecture', 'resnet')
        model_size = kwargs.pop('model_size', 18)
        optimizer = kwargs.pop('optimizer', 'random')
        base_dir = kwargs.pop('base_dir', os.path.expanduser('~'))
        
        super(HPO_ERM, self).__init__(
            task_name=task_name, 
            budget_type=budget_type, 
            budget=budget, 
            seed=seed, 
            workload=workload, 
            algorithm=algorithm, 
            architecture=architecture, 
            model_size=model_size,
            optimizer=optimizer,
            base_dir=base_dir,
            **kwargs
        )

def test_all_combinations():
    print("Testing all combinations of architectures, algorithms, and datasets...")
    
    for architecture in HPO_base.ARCHITECTURES:
        for model_size in HPO_base.ARCHITECTURES[architecture]:
            for algorithm in HPO_base.ALGORITHMS:
                for dataset_index, dataset in enumerate(HPO_base.DATASETS):
                    print(f"Testing {architecture}-{model_size} with {algorithm} on {dataset}...")
                    try:
                        # Create an instance of HPO_base
                        hpo = HPO_base(task_name='test_combination', 
                                       budget_type='FEs', budget=100, seed=0, 
                                       workload=dataset_index, algorithm=algorithm, 
                                       architecture=architecture, model_size=model_size, optimizer='test_combination')
                        
                        # Get the configuration space
                        config_space = hpo.get_configuration_space()
                        
                        # Get the fidelity space
                        fidelity_space = hpo.get_fidelity_space()
                        
                        # Sample a random configuration
                        config = {}
                        for name, var in config_space.get_design_variables().items():
                            if isinstance(var, Integer):
                                config[name] = np.random.randint(var.search_space_range[0], var.search_space_range[1] + 1)
                            elif isinstance(var, Continuous) or isinstance(var, LogContinuous):
                                config[name] = np.random.uniform(var.search_space_range[0], var.search_space_range[1])
                            elif isinstance(var, Categorical):
                                config[name] = np.random.choice(var.search_space_range)
                                
                        
                        # Sample a random fidelity
                        fidelity = {}
                        for name, var in fidelity_space.get_fidelity_range().items():
                            if isinstance(var, Integer):
                                fidelity[name] = np.random.randint(var.search_space_range[0], var.search_space_range[1] + 1)
                            elif isinstance(var, Continuous):
                                fidelity[name] = np.random.uniform(var.search_space_range[0], var.search_space_range[1])
                            elif isinstance(var, Categorical):
                                fidelity[name] = np.random.choice(var.search_space_range)
                        
                        # Set a small epoch for quick testing
                        fidelity['epoch'] = 2
                        
                        # Run the objective function
                        result = hpo.objective_function(configuration=config, fidelity=fidelity)
                        
                        print(f"Configuration: {config}")
                        print(f"Fidelity: {fidelity}")
                        print(f"Result: {result}")
                        
                        assert list(hpo.get_objectives().keys())[0] in result, f"Result should contain '{list(hpo.get_objectives().keys())[0]}'"
                        assert 0 <= result[list(hpo.get_objectives().keys())[0]] <= 1, f"{list(hpo.get_objectives().keys())[0]} should be between 0 and 1"
                        
                        print(f"Test passed for {architecture}-{model_size} with {algorithm} on {dataset}!")
                        print("--------------------")
                    except Exception as e:
                        print(f"Error occurred during test for {architecture}-{model_size} with {algorithm} on {dataset}: {str(e)}")
                        import traceback
                        traceback.print_exc()
                        print("--------------------")

if __name__ == "__main__":
    import torch
    import numpy as np

    # Set random seed for reproducibility
    np.random.seed(0)
    torch.manual_seed(0)
    
    # Run the comprehensive test
    try:
        test_all_combinations()
    except Exception as e:
        print(f"Error occurred during HPO_ERM test: {str(e)}")
        import traceback
        traceback.print_exc()


================================================
FILE: transopt/benchmark/HPO/HPOAdaBoost.py
================================================
import os
import time
import logging
import torch
import numpy as np
import xgboost as xgb
from typing import Union, Tuple, Dict, List
from sklearn import pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.preprocessing import OneHotEncoder

from transopt.utils.openml_data_manager import OpenMLHoldoutDataManager
from transopt.space.variable import *
from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.search_space import SearchSpace
from transopt.space.fidelity_space import FidelitySpace

from transopt.optimizer.sampler.random import RandomSampler

os.environ['OMP_NUM_THREADS'] = "1"
logger = logging.getLogger('XGBBenchmark')


@problem_registry.register('AdaBoost')
class XGBoostBenchmark(NonTabularProblem):
    task_lists = [167149, 167152, 126029, 167178, 167177, 167153, 167154, 167155, 167156]
    problem_type = 'hpo'
    num_variables = 10
    num_objectives = 1
    workloads = []
    fidelity = None
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        """

        Parameters
        ----------
        task_id : int, None
        n_threads  : int, None
        seed : np.random.RandomState, int, None
        """
        super(XGBoostBenchmark, self).__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        self.task_id = XGBoostBenchmark.task_lists[workload]
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')
        self.n_threads = 1
        self.budget = budget
        self.accuracy_scorer = make_scorer(accuracy_score)

        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
            self.get_data()
        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])

        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
        categorical_idx = np.argwhere(self.categorical_data)
        continuous_idx = np.argwhere(~self.categorical_data)
        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
        self.categorical_data = self.categorical_data[sorting]
        self.x_train = self.x_train[:, sorting]
        self.x_valid = self.x_valid[:, sorting]
        self.x_test = self.x_test[:, sorting]

        nan_columns = np.all(np.isnan(self.x_train), axis=0)
        self.categorical_data = self.categorical_data[~nan_columns]

        self.x_train, self.x_valid, self.x_test, self.categories = \
            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
                                                                 is_categorical=self.categorical_data)

        # Determine the number of categories in the labels.
        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
        self.num_class = 1 if self.num_class == 2 else self.num_class

        self.train_idx = np.random.choice(a=np.arange(len(self.x_train)),
                                         size=len(self.x_train),
                                         replace=False)

        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
        # (https://arxiv.org/pdf/1605.07079.pdf),
        # use 10 time the number of classes as lower bound for the dataset fraction
        n_classes = np.unique(self.y_train).shape[0]
        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]

    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
        """ Loads the data given a task or another source. """

        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
                                                             'overwrite the get_data Method.')

        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.seed)
        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()

        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types

    def shuffle_data(self, seed=None):
        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
        class-random-state"""
        random_state = seed
        random_state.shuffle(self.train_idx)

    # pylint: disable=arguments-differ
    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        """
        Trains a XGBoost model given a hyperparameter configuration and
        evaluates the model on the validation set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the XGBoost model
        fidelity: Dict, None
            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.

            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : validation loss
            cost : time to train and evaluate the model
            info : Dict
                train_loss : trainings loss
                fidelity : used fidelities in this evaluation
        """
        self.seed = seed

        # if shuffle:
        #     self.shuffle_data(self.seed)

        start = time.time()

        model = self._get_pipeline(**configuration)
        model.fit(X=self.x_train, y=self.y_train)

        train_loss = 1 - self.accuracy_scorer(model, self.x_train, self.y_train)
        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
        cost = time.time() - start

        # return {'function_value': float(val_loss),
        #         'cost': cost,
        #         'info': {'train_loss': float(train_loss),
        #                  'fidelity': fidelity}
        #         }
        results = {list(self.objective_info.keys())[0]: float(val_loss)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    # pylint: disable=arguments-differ
    def objective_function_test(self, configuration: Union[Dict],
                                fidelity: Union[Dict, None] = None,
                                shuffle: bool = False,
                                seed: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
        """
        Trains a XGBoost model with a given configuration on both the train
        and validation data set and evaluates the model on the test data set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the XGBoost Model
        fidelity: Dict, None
            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.
            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : test loss
            cost : time to train and evaluate the model
            info : Dict
                fidelity : used fidelities in this evaluation
        """
        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
        if fidelity['dataset_fraction'] != default_dataset_fraction:
            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
                                      f'{default_dataset_fraction}')

        self.seed = seed

        if shuffle:
            self.shuffle_data(self.seed)

        start = time.time()

        # Impute potential nan values with the feature-
        data = np.concatenate((self.x_train, self.x_valid))
        targets = np.concatenate((self.y_train, self.y_valid))

        model = self._get_pipeline(**configuration)
        model.fit(X=data, y=targets)

        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
        cost = time.time() - start

        return {'function_value': float(test_loss),
                'cost': cost,
                'info': {'fidelity': fidelity}}

    def get_configuration_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """

        variables=[Continuous('eta', [-10.0, 0.0]),
                   Integer('max_depth', [1, 15]),
                   Continuous('min_child_weight', [0.0, 7.0]),
                   Continuous('colsample_bytree', [0.01, 1.0]),
                   Continuous('colsample_bylevel', [0.01, 1.0]),
                   Continuous('reg_lambda', [-10.0, 10.0]),
                   Continuous('reg_alpha', [-10.0, 10.0]),
                   Continuous('subsample_per_it', [0.1, 1.0]),
                   Integer('n_estimators', [1, 50]),
                   Continuous('gamma', [0.0, 1.0])]
        ss = SearchSpace(variables)
        return ss


    def get_fidelity_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        # seed = seed if seed is not None else np.random.randint(1, 100000)
        # fidel_space = CS.ConfigurationSpace(seed=seed)

        # fidel_space.add_hyperparameters([
        #     CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
        #     CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
        # ])

        # return fidel_space
        fs = FidelitySpace([])
        return fs


    def get_meta_information(self) -> Dict:
        """ Returns the meta information for the benchmark """
        return {'name': 'XGBoost',
                'references': ['@article{probst2019tunability,'
                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
                               'journal={J. Mach. Learn. Res.},'
                               'volume={20},'
                               'number={53},'
                               'pages={1--32},'
                               'year={2019}'
                               '}'],
                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
                        'xgboost_benchmark_old.py',
                'shape of train data': self.x_train.shape,
                'shape of test data': self.x_test.shape,
                'shape of valid data': self.x_valid.shape,
                'initial random seed': self.seed,
                'task_id': self.task_id
                }

    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
                      n_estimators: int, subsample_per_it: float, gamma: float) \
            -> pipeline.Pipeline:
        """ Create the scikit-learn (training-)pipeline """
        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'

        if torch.cuda.is_available():
            clf = pipeline.Pipeline([
                ('preprocess_impute',
                 ColumnTransformer([
                     ("categorical", "passthrough", self.categorical_data),
                     ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
                ('preprocess_one_hot',
                 ColumnTransformer([
                     ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
                     ("continuous", "passthrough", ~self.categorical_data)])),
                ('xgb',
                 xgb.XGBClassifier(
                     max_depth=max_depth,
                     learning_rate=np.exp2(eta),
                     min_child_weight=np.exp2(min_child_weight),
                     colsample_bytree=colsample_bytree,
                     colsample_bylevel=colsample_bylevel,
                     reg_alpha=np.exp2(reg_alpha),
                     reg_lambda=np.exp2(reg_lambda),
                     n_estimators=n_estimators,
                     objective=objective,
                     n_jobs=self.n_threads,
                     random_state=self.seed,
                     num_class=self.num_class,
                     subsample=subsample_per_it,
                     gamma=gamma,
                     tree_method='gpu_hist',
                     gpu_id=0
                 ))
            ])
        else:
            clf = pipeline.Pipeline([
                ('preprocess_impute',
                 ColumnTransformer([
                     ("categorical", "passthrough", self.categorical_data),
                     ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
                ('preprocess_one_hot',
                 ColumnTransformer([
                     ("categorical", OneHotEncoder(categories=self.categories), self.categorical_data),
                     ("continuous", "passthrough", ~self.categorical_data)])),
                ('xgb',
                 xgb.XGBClassifier(
                     max_depth=max_depth,
                     learning_rate=np.exp2(eta),
                     min_child_weight=np.exp2(min_child_weight),
                     colsample_bytree=colsample_bytree,
                     colsample_bylevel=colsample_bylevel,
                     reg_alpha=np.exp2(reg_alpha),
                     reg_lambda=np.exp2(reg_lambda),
                     n_estimators=n_estimators,
                     objective=objective,
                     n_jobs=self.n_threads,
                     random_state=self.seed,
                     num_class=self.num_class,
                     subsample=subsample_per_it,
                     gamma = gamma,
                     ))
                ])

        return clf

    def get_objectives(self) -> Dict:
        return {'train_loss': 'minimize'}
    
    def get_problem_type(self):
        return "hpo"

    # def get_var_range(self):
    #     return {'eta':[-10,0], 'max_depth':[1, 15], 'min_child_weight':[0, 7], 'colsample_bytree':[0.01, 1.0], 'colsample_bylevel':[0.01, 1.0],
    #             'reg_lambda':[-10, 10], 'reg_alpha':[-10, 10], 'subsample_per_it':[0.1, 1.0], 'n_estimators':[1, 50], 'gamma':[0,1.0]}
    #
    #
    # def get_var_type(self):
    #     return {'eta':'exp2', 'max_depth':'int', 'min_child_weight':'exp2', 'colsample_bytree':'float','colsample_bylevel':'float',
    #             'reg_lambda':'exp2', 'reg_alpha':'exp2', 'subsample_per_it':'float', 'n_estimators':'int', 'gamma':'float'}


if __name__ == '__main__':
    task_lists = [167149, 167152, 126029, 167178, 167177, 167153, 167154, 167155, 167156]
    workload = 8
    problem = XGBoostBenchmark(task_name='XGB', budget=20, budget_type = 'fes', workload=workload, seed = 0)
    sampler = RandomSampler(3000, config=None)
    space = problem.configuration_space
    samples = sampler.sample(space,3000)
    
    parameters = [space.map_to_design_space(sample) for sample in samples]
    import tqdm
    for para_id in tqdm.tqdm(range(len(parameters))):
        parameters[para_id]['score'] = problem.f(parameters[para_id])['train_loss']
    import pandas as pd
    

    df  = pd.DataFrame(parameters)
    df.to_csv(f'XGB_{workload}.csv')

    # a = problem.f({'eta':-0.2, 'max_depth':5, 'min_child_weight':2, 'colsample_bytree':0.4, 'colsample_bylevel':0.4,
    #             'reg_lambda':0.5, 'reg_alpha':-0.2, 'subsample_per_it':0.7, 'n_estimators':20, 'gamma':0.9})
    

================================================
FILE: transopt/benchmark/HPO/HPOSVM.py
================================================
import logging
import time
import numpy as np
from scipy import sparse
from typing import Union, Tuple, Dict, List
from sklearn import pipeline
from sklearn import svm
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler

from transopt.utils.openml_data_manager import OpenMLHoldoutDataManager
from transopt.space.variable import *
from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.search_space import SearchSpace
from transopt.space.fidelity_space import FidelitySpace

logger = logging.getLogger('SVMBenchmark')


@problem_registry.register('SVM')
class SupportVectorMachine(NonTabularProblem):
    """
    Hyperparameter optimization task to optimize the regularization
    parameter C and the kernel parameter gamma of a support vector machine.
    Both hyperparameters are optimized on a log scale in [-10, 10].
    The X_test data set is only used for a final offline evaluation of
    a configuration. For that the validation and training data is
    concatenated to form the whole training data set.
    """
    task_lists = [167149, 167152, 167183, 126025, 126029, 167161, 167169,
                  167178, 167176, 167177]
    problem_type = 'hpo'
    num_variables = 2
    num_objectives = 1
    workloads = []
    fidelity = None

    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        """
        Parameters
        ----------
        task_id : int, None
        rng : np.random.RandomState, int, None
        """
        super(SupportVectorMachine, self).__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        task_type='non-tabular'
        self.task_id = SupportVectorMachine.task_lists[workload]
        self.cache_size = 200  # Cache for the SVC in MB
        self.accuracy_scorer = make_scorer(accuracy_score)

        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
            self.get_data()
        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])

        # Sort data (Categorical + numerical) so that categorical and continous are not mixed.
        categorical_idx = np.argwhere(self.categorical_data)
        continuous_idx = np.argwhere(~self.categorical_data)
        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
        self.categorical_data = self.categorical_data[sorting]
        self.x_train = self.x_train[:, sorting]
        self.x_valid = self.x_valid[:, sorting]
        self.x_test = self.x_test[:, sorting]

        nan_columns = np.all(np.isnan(self.x_train), axis=0)
        self.categorical_data = self.categorical_data[~nan_columns]
        self.x_train, self.x_valid, self.x_test, self.categories = \
            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
                                                                 is_categorical=self.categorical_data)

        self.train_idx = np.random.choice(a=np.arange(len(self.x_train)),
                                         size=len(self.x_train),
                                         replace=False)

        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
        # (https://arxiv.org/pdf/1605.07079.pdf),
        # use 10 time the number of classes as lower bound for the dataset fraction
        n_classes = np.unique(self.y_train).shape[0]
        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]

    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
        """ Loads the data given a task or another source. """

        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
                                                             'overwrite the get_data Method.')

        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.seed)
        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()

        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types

    def shuffle_data(self, seed=None):
        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
        class-random-state"""
        random_state = seed
        random_state.shuffle(self.train_idx)

    # pylint: disable=arguments-differ
    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        """
        Trains a SVM model given a hyperparameter configuration and
        evaluates the model on the validation set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the SVM model
        fidelity: Dict, None
            Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.

            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : validation loss
            cost : time to train and evaluate the model
            info : Dict
                train_loss : training loss
                fidelity : used fidelities in this evaluation
        """
        start_time = time.time()

        self.seed = seed

        # if shuffle:
        #     self.shuffle_data(self.seed)

        # Transform hyperparameters to linear scale
        hp_c = np.exp(float(configuration['C']))
        hp_gamma = np.exp(float(configuration['gamma']))

        # Train support vector machine
        model = self.get_pipeline(hp_c, hp_gamma)
        model.fit(self.x_train, self.y_train)

        # Compute validation error
        train_loss = 1 - self.accuracy_scorer(model, self.x_train, self.y_train)
        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)

        cost = time.time() - start_time

        # return {'function_value': float(val_loss),
        #         "cost": cost,
        #         'info': {'train_loss': float(train_loss),
        #                  'fidelity': fidelity}}

        results = {list(self.objective_info.keys())[0]: float(val_loss)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    # pylint: disable=arguments-differ
    def objective_function_test(self, configuration: Union[Dict],
                                fidelity: Union[Dict, None] = None,
                                shuffle: bool = False,
                                seed: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
        """
        Trains a SVM model with a given configuration on both the X_train
        and validation data set and evaluates the model on the X_test data set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the SVM Model
        fidelity: Dict, None
            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.
            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : X_test loss
            cost : time to X_train and evaluate the model
            info : Dict
                train_valid_loss: Loss on the train+valid data set
                fidelity : used fidelities in this evaluation
        """


        self.seed = seed

        if shuffle:
            self.shuffle_data(self.seed)

        start_time = time.time()

        # Concatenate training and validation dataset
        if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix):
            data = sparse.vstack((self.x_train, self.x_valid))
        else:
            data = np.concatenate((self.x_train, self.x_valid))
        targets = np.concatenate((self.y_train, self.y_valid))

        # Transform hyperparameters to linear scale
        hp_c = np.exp(float(configuration['C']))
        hp_gamma = np.exp(float(configuration['gamma']))

        model = self.get_pipeline(hp_c, hp_gamma)
        model.fit(data, targets)

        # Compute validation error
        train_valid_loss = 1 - self.accuracy_scorer(model, data, targets)

        # Compute test error
        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)

        cost = time.time() - start_time

        # return {'function_value': float(test_loss),
        #         "cost": cost,
        #         'info': {'train_valid_loss': float(train_valid_loss),
        #                  'fidelity': fidelity}}

        results = {list(self.objective_info.keys())[0]: float(test_loss)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 

        return results

    def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline:
        """ Create the scikit-learn (training-)pipeline """

        model = pipeline.Pipeline([
            ('preprocess_impute',
             ColumnTransformer([
                 ("categorical", "passthrough", self.categorical_data),
                 ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
            ('preprocess_one_hot',
             ColumnTransformer([
                 ("categorical", OneHotEncoder(categories=self.categories), self.categorical_data),
                 ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])),
            ('svm',
             svm.SVC(gamma=gamma, C=C, random_state=self.seed, cache_size=self.cache_size))
        ])
        return model

    def get_configuration_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the SVM Model

        For a detailed explanation of the hyperparameters:
        https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """

        variables=[Continuous('C', [-10, 10]), Continuous('gamma', [-10, 10])]
        ss = SearchSpace(variables)
        return ss


    def get_fidelity_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the SupportVector Benchmark

        Fidelities
        ----------
        dataset_fraction: float - [0.1, 1]
            fraction of training data set to use

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        # seed = seed if seed is not None else np.random.randint(1, 100000)
        # fidel_space = CS.ConfigurationSpace(seed=seed)

        # fidel_space.add_hyperparameters([
        #     CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
        # ])

        fs = FidelitySpace([])
        return fs


    def get_meta_information(self):
        """ Returns the meta information for the benchmark """
        return {'name': 'Support Vector Machine',
                'references': ["@InProceedings{pmlr-v54-klein17a",
                               "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and "
                               "Frank Hutter}, "
                               "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on "
                               "Large Datasets}}"
                               "pages = {528--536}, year = {2017},"
                               "editor = {Aarti Singh and Jerry Zhu},"
                               "volume = {54},"
                               "series = {Proceedings of Machine Learning Research},"
                               "address = {Fort Lauderdale, FL, USA},"
                               "month = {20--22 Apr},"
                               "publisher = {PMLR},"
                               "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, "
                               "url = {http://proceedings.mlr.press/v54/klein17a.html}, "
                               ],
                'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py',
                'shape of train data': self.x_train.shape,
                'shape of test data': self.x_test.shape,
                'shape of valid data': self.x_valid.shape,
                'initial random seed': self.seed,
                'task_id': self.task_id
                }
    
    def get_objectives(self) -> Dict:
        return {'train_loss': 'minimize'}
    
    def get_problem_type(self):
        return "hpo"

if __name__ == '__main__':
    task_lists = [167149, 167152, 126029, 167178, 167177]
    problem = SupportVectorMachine(task_name='svm',task_id=167149, seed=0, budget=10)
    a = problem.f({'C':0.2, 'gamma':-0.3})
    print(a)


================================================
FILE: transopt/benchmark/HPO/HPOXGBoost.py
================================================
import os
import time
import logging
import torch
import numpy as np
import xgboost as xgb
from typing import Union, Tuple, Dict, List
from sklearn import pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, make_scorer
from sklearn.preprocessing import OneHotEncoder

from transopt.utils.openml_data_manager import OpenMLHoldoutDataManager
from transopt.space.variable import *
from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.search_space import SearchSpace
from transopt.space.fidelity_space import FidelitySpace

from transopt.optimizer.sampler.random import RandomSampler

os.environ['OMP_NUM_THREADS'] = "1"
logger = logging.getLogger('XGBBenchmark')


@problem_registry.register('XGB')
class XGBoostBenchmark(NonTabularProblem):
    task_lists = [167149, 167152, 126029, 167178, 167177, 167153, 167154, 167155, 167156]
    problem_type = 'hpo'
    num_variables = 10
    num_objectives = 1
    workloads = []
    fidelity = None
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        """

        Parameters
        ----------
        task_id : int, None
        n_threads  : int, None
        seed : np.random.RandomState, int, None
        """
        super(XGBoostBenchmark, self).__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )
        self.task_id = XGBoostBenchmark.task_lists[workload]
        if torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')
        self.n_threads = 1
        self.budget = budget
        self.accuracy_scorer = make_scorer(accuracy_score)

        self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \
            self.get_data()
        self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types])

        # XGB needs sorted data. Data should be (Categorical + numerical) not mixed.
        categorical_idx = np.argwhere(self.categorical_data)
        continuous_idx = np.argwhere(~self.categorical_data)
        sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze()
        self.categorical_data = self.categorical_data[sorting]
        self.x_train = self.x_train[:, sorting]
        self.x_valid = self.x_valid[:, sorting]
        self.x_test = self.x_test[:, sorting]

        nan_columns = np.all(np.isnan(self.x_train), axis=0)
        self.categorical_data = self.categorical_data[~nan_columns]

        self.x_train, self.x_valid, self.x_test, self.categories = \
            OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test,
                                                                 is_categorical=self.categorical_data)

        # Determine the number of categories in the labels.
        # In case of binary classification ``self.num_class`` has to be 1 for xgboost.
        self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid])))
        self.num_class = 1 if self.num_class == 2 else self.num_class

        self.train_idx = np.random.choice(a=np.arange(len(self.x_train)),
                                         size=len(self.x_train),
                                         replace=False)

        # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets]
        # (https://arxiv.org/pdf/1605.07079.pdf),
        # use 10 time the number of classes as lower bound for the dataset fraction
        n_classes = np.unique(self.y_train).shape[0]
        self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0]

    def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]:
        """ Loads the data given a task or another source. """

        assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or '
                                                             'overwrite the get_data Method.')

        data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.seed)
        x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load()

        return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types

    def shuffle_data(self, seed=None):
        """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the
        class-random-state"""
        random_state = seed
        random_state.shuffle(self.train_idx)

    # pylint: disable=arguments-differ
    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        """
        Trains a XGBoost model given a hyperparameter configuration and
        evaluates the model on the validation set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the XGBoost model
        fidelity: Dict, None
            Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.

            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : validation loss
            cost : time to train and evaluate the model
            info : Dict
                train_loss : trainings loss
                fidelity : used fidelities in this evaluation
        """
        self.seed = seed

        # if shuffle:
        #     self.shuffle_data(self.seed)

        start = time.time()

        model = self._get_pipeline(**configuration)
        model.fit(X=self.x_train, y=self.y_train)

        train_loss = 1 - self.accuracy_scorer(model, self.x_train, self.y_train)
        val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid)
        cost = time.time() - start

        # return {'function_value': float(val_loss),
        #         'cost': cost,
        #         'info': {'train_loss': float(train_loss),
        #                  'fidelity': fidelity}
        #         }
        results = {list(self.objective_info.keys())[0]: float(val_loss)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    # pylint: disable=arguments-differ
    def objective_function_test(self, configuration: Union[Dict],
                                fidelity: Union[Dict, None] = None,
                                shuffle: bool = False,
                                seed: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict:
        """
        Trains a XGBoost model with a given configuration on both the train
        and validation data set and evaluates the model on the test data set.

        Parameters
        ----------
        configuration : Dict, CS.Configuration
            Configuration for the XGBoost Model
        fidelity: Dict, None
            Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None.
        shuffle : bool
            If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state.
            Defaults to ``False``.
        rng : np.random.RandomState, int, None,
            Random seed for benchmark. By default the class level random seed.
            To prevent overfitting on a single seed, it is possible to pass a
            parameter ``rng`` as 'int' or 'np.random.RandomState' to this function.
            If this parameter is not given, the default random state is used.
        kwargs

        Returns
        -------
        Dict -
            function_value : test loss
            cost : time to train and evaluate the model
            info : Dict
                fidelity : used fidelities in this evaluation
        """
        default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value
        if fidelity['dataset_fraction'] != default_dataset_fraction:
            raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= '
                                      f'{default_dataset_fraction}')

        self.seed = seed

        if shuffle:
            self.shuffle_data(self.seed)

        start = time.time()

        # Impute potential nan values with the feature-
        data = np.concatenate((self.x_train, self.x_valid))
        targets = np.concatenate((self.y_train, self.y_valid))

        model = self._get_pipeline(**configuration)
        model.fit(X=data, y=targets)

        test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test)
        cost = time.time() - start

        return {'function_value': float(test_loss),
                'cost': cost,
                'info': {'fidelity': fidelity}}

    def get_configuration_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """

        variables=[Continuous('eta', [-10.0, 0.0]),
                   Integer('max_depth', [1, 15]),
                   Continuous('min_child_weight', [0.0, 7.0]),
                   Continuous('colsample_bytree', [0.01, 1.0]),
                   Continuous('colsample_bylevel', [0.01, 1.0]),
                   Continuous('reg_lambda', [-10.0, 10.0]),
                   Continuous('reg_alpha', [-10.0, 10.0]),
                   Continuous('subsample_per_it', [0.1, 1.0]),
                   Integer('n_estimators', [1, 50]),
                   Continuous('gamma', [0.0, 1.0])]
        ss = SearchSpace(variables)
        return ss


    def get_fidelity_space(self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        # seed = seed if seed is not None else np.random.randint(1, 100000)
        # fidel_space = CS.ConfigurationSpace(seed=seed)

        # fidel_space.add_hyperparameters([
        #     CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False),
        #     CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False)
        # ])

        # return fidel_space
        fs = FidelitySpace([])
        return fs


    def get_meta_information(self) -> Dict:
        """ Returns the meta information for the benchmark """
        return {'name': 'XGBoost',
                'references': ['@article{probst2019tunability,'
                               'title={Tunability: Importance of hyperparameters of machine learning algorithms.},'
                               'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},'
                               'journal={J. Mach. Learn. Res.},'
                               'volume={20},'
                               'number={53},'
                               'pages={1--32},'
                               'year={2019}'
                               '}'],
                'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/'
                        'xgboost_benchmark_old.py',
                'shape of train data': self.x_train.shape,
                'shape of test data': self.x_test.shape,
                'shape of valid data': self.x_valid.shape,
                'initial random seed': self.seed,
                'task_id': self.task_id
                }

    def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int,
                      colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int,
                      n_estimators: int, subsample_per_it: float, gamma: float) \
            -> pipeline.Pipeline:
        """ Create the scikit-learn (training-)pipeline """
        objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax'

        if torch.cuda.is_available():
            clf = pipeline.Pipeline([
                ('preprocess_impute',
                 ColumnTransformer([
                     ("categorical", "passthrough", self.categorical_data),
                     ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
                ('preprocess_one_hot',
                 ColumnTransformer([
                     ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data),
                     ("continuous", "passthrough", ~self.categorical_data)])),
                ('xgb',
                 xgb.XGBClassifier(
                     max_depth=max_depth,
                     learning_rate=np.exp2(eta),
                     min_child_weight=np.exp2(min_child_weight),
                     colsample_bytree=colsample_bytree,
                     colsample_bylevel=colsample_bylevel,
                     reg_alpha=np.exp2(reg_alpha),
                     reg_lambda=np.exp2(reg_lambda),
                     n_estimators=n_estimators,
                     objective=objective,
                     n_jobs=self.n_threads,
                     random_state=self.seed,
                     num_class=self.num_class,
                     subsample=subsample_per_it,
                     gamma=gamma,
                     tree_method='gpu_hist',
                     gpu_id=0
                 ))
            ])
        else:
            clf = pipeline.Pipeline([
                ('preprocess_impute',
                 ColumnTransformer([
                     ("categorical", "passthrough", self.categorical_data),
                     ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])),
                ('preprocess_one_hot',
                 ColumnTransformer([
                     ("categorical", OneHotEncoder(categories=self.categories), self.categorical_data),
                     ("continuous", "passthrough", ~self.categorical_data)])),
                ('xgb',
                 xgb.XGBClassifier(
                     max_depth=max_depth,
                     learning_rate=np.exp2(eta),
                     min_child_weight=np.exp2(min_child_weight),
                     colsample_bytree=colsample_bytree,
                     colsample_bylevel=colsample_bylevel,
                     reg_alpha=np.exp2(reg_alpha),
                     reg_lambda=np.exp2(reg_lambda),
                     n_estimators=n_estimators,
                     objective=objective,
                     n_jobs=self.n_threads,
                     random_state=self.seed,
                     num_class=self.num_class,
                     subsample=subsample_per_it,
                     gamma = gamma,
                     ))
                ])

        return clf

    def get_objectives(self) -> Dict:
        return {'train_loss': 'minimize'}
    
    def get_problem_type(self):
        return "hpo"

    # def get_var_range(self):
    #     return {'eta':[-10,0], 'max_depth':[1, 15], 'min_child_weight':[0, 7], 'colsample_bytree':[0.01, 1.0], 'colsample_bylevel':[0.01, 1.0],
    #             'reg_lambda':[-10, 10], 'reg_alpha':[-10, 10], 'subsample_per_it':[0.1, 1.0], 'n_estimators':[1, 50], 'gamma':[0,1.0]}
    #
    #
    # def get_var_type(self):
    #     return {'eta':'exp2', 'max_depth':'int', 'min_child_weight':'exp2', 'colsample_bytree':'float','colsample_bylevel':'float',
    #             'reg_lambda':'exp2', 'reg_alpha':'exp2', 'subsample_per_it':'float', 'n_estimators':'int', 'gamma':'float'}


if __name__ == '__main__':
    task_lists = [167149, 167152, 126029, 167178, 167177, 167153, 167154, 167155, 167156]
    workload = 8
    problem = XGBoostBenchmark(task_name='XGB', budget=20, budget_type = 'fes', workload=workload, seed = 0)
    sampler = RandomSampler(3000, config=None)
    space = problem.configuration_space
    samples = sampler.sample(space,3000)
    
    parameters = [space.map_to_design_space(sample) for sample in samples]
    import tqdm
    for para_id in tqdm.tqdm(range(len(parameters))):
        parameters[para_id]['score'] = problem.f(parameters[para_id])['train_loss']
    import pandas as pd
    

    df  = pd.DataFrame(parameters)
    df.to_csv(f'XGB_{workload}.csv')

    # a = problem.f({'eta':-0.2, 'max_depth':5, 'min_child_weight':2, 'colsample_bytree':0.4, 'colsample_bylevel':0.4,
    #             'reg_lambda':0.5, 'reg_alpha':-0.2, 'subsample_per_it':0.7, 'n_estimators':20, 'gamma':0.9})
    

================================================
FILE: transopt/benchmark/HPO/__init__.py
================================================
from transopt.benchmark.HPO.HPOSVM import SupportVectorMachine
from transopt.benchmark.HPO.HPOXGBoost import XGBoostBenchmark


from transopt.benchmark.HPOOOD.hpoood import ERMOOD

================================================
FILE: transopt/benchmark/HPO/algorithms.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import copy
from collections import OrderedDict

import numpy as np
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torchvision.models
from transopt.benchmark.HPO import networks
from sklearn.linear_model import SGDClassifier
import pyro
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import SGD

from transopt.benchmark.HPO.augmentation import mixup_data, mixup_criterion

ALGORITHMS = [
    'ERM',
    'GLMNet',
    'BayesianNN',
]

def get_algorithm_class(algorithm_name):
    """Return the algorithm class with the given name."""
    if algorithm_name not in globals():
        raise NotImplementedError("Algorithm not found: {}".format(algorithm_name))
    return globals()[algorithm_name]

class Algorithm(torch.nn.Module):
    """
    A subclass of Algorithm implements a domain generalization algorithm.
    Subclasses should implement the following:
    - update()
    - predict()
    """
    def __init__(self, input_shape, num_classes, architecture, model_size, mixup, device, hparams):
        super(Algorithm, self).__init__()
        self.hparams = hparams
        self.architecture = architecture
        self.model_size = model_size
        self.device = device
        self.mixup = mixup
        if self.mixup:
            self.mixup_alpha = self.hparams.get('mixup_alpha', 0.3)

    def update(self, minibatches, unlabeled=None):
        """
        Perform one update step, given a list of (x, y) tuples for all
        environments.

        Admits an optional list of unlabeled minibatches from the test domains,
        when task is domain_adaptation.
        """
        raise NotImplementedError

    def predict(self, x):
        raise NotImplementedError

class ERM(Algorithm):
    """
    Empirical Risk Minimization (ERM)
    """

    def __init__(self, input_shape, num_classes, architecture, model_size, mixup, device, hparams):
        super(ERM, self).__init__(input_shape, num_classes, architecture, model_size,  mixup, device, hparams)
        self.featurizer = networks.Featurizer(input_shape, architecture, model_size, self.hparams)
        print(self.featurizer.n_outputs)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['dropout_rate'],
            self.hparams['nonlinear_classifier'])

        self.network = nn.Sequential(self.featurizer, self.classifier)
        self.optimizer = torch.optim.SGD(
            self.network.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay'],
            momentum=self.hparams['momentum']
        )

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])

        if self.mixup:
            all_x, all_y_a, all_y_b, lam = mixup_data(all_x, all_y, self.mixup_alpha,  self.device)
            all_x, all_y_a, all_y_b = map(torch.autograd.Variable, (all_x, all_y_a, all_y_b))

        predictions = self.predict(all_x)

        if self.mixup:
            loss = mixup_criterion(F.cross_entropy, predictions, all_y_a, all_y_b, lam)
        else:
            loss = F.cross_entropy(predictions, all_y)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        if self.mixup:
            correct = (lam * predictions.argmax(1).eq(all_y_a).float() +
                       (1 - lam) * predictions.argmax(1).eq(all_y_b).float()).sum().item()
        else:
            correct = (predictions.argmax(1) == all_y).sum().item()

        return {'loss': loss.item(), 'correct': correct}

    def predict(self, x):
        return self.network(x)

class GLMNet(Algorithm):
    """
    Generalized Linear Model with Elastic Net Regularization (GLMNet)
    """

    def __init__(self, input_shape, num_classes, architecture, model_size, mixup, device, hparams):
        super(GLMNet, self).__init__(input_shape, num_classes, architecture, model_size,  mixup, device, hparams)
        self.featurizer = networks.Featurizer(input_shape, architecture, model_size, self.hparams)
        self.num_classes = num_classes
        
        # 使用 SGDClassifier 作为 GLMNet
        self.classifier = SGDClassifier(
            loss='log',  # 对数损失，用于分类
            penalty='elasticnet',  # 弹性网络正则化
            alpha=self.hparams['glmnet_alpha'],  # 正则化强度
            l1_ratio=self.hparams['glmnet_l1_ratio'],  # L1 正则化的比例
            learning_rate='optimal',
            max_iter=1,  # 每次更新只进行一次迭代
            warm_start=True,  # 允许增量学习
            random_state=self.hparams['random_seed']
        )
        
        self.optimizer = torch.optim.SGD(
            self.featurizer.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay'],
            momentum=self.hparams['momentum']
        )

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        
        # 提取特征
        features = self.featurizer(all_x).detach().cpu().numpy()
        labels = all_y.cpu().numpy()
        
        # 更新 GLMNet 分类器
        self.classifier.partial_fit(features, labels, classes=np.arange(self.num_classes))
        
        # 计算损失（仅用于记录，不用于反向传播）
        loss = -self.classifier.score(features, labels)
        
        # 更新特征提取器
        self.optimizer.zero_grad()
        features = self.featurizer(all_x)
        logits = torch.tensor(self.classifier.decision_function(features.detach().cpu().numpy())).to(all_x.device)
        feature_loss = F.cross_entropy(logits, all_y)
        feature_loss.backward()
        self.optimizer.step()

        return {'loss': loss, 'feature_loss': feature_loss.item()}

    def predict(self, x):
        features = self.featurizer(x).detach().cpu().numpy()
        return torch.tensor(self.classifier.predict_proba(features)).to(x.device)

class BayesianNN(Algorithm):
    """
    Two-layer Bayesian Neural Network
    """

    def __init__(self, input_shape, num_classes, hparams):
        super(BayesianNN, self).__init__(input_shape, num_classes, None, None, hparams)
        self.input_dim = input_shape[0] * input_shape[1] * input_shape[2]
        self.hidden_dim1 = hparams['bayesian_hidden_dim1']
        self.hidden_dim2 = hparams['bayesian_hidden_dim2']
        self.output_dim = num_classes
        self.num_samples = hparams['bayesian_num_samples']

        # Initialize parameters
        self.w1_mu = nn.Parameter(torch.randn(self.input_dim, self.hidden_dim1))
        self.w1_sigma = nn.Parameter(torch.randn(self.input_dim, self.hidden_dim))
        self.w2_mu = nn.Parameter(torch.randn(self.hidden_dim2, self.output_dim))
        self.w2_sigma = nn.Parameter(torch.randn(self.hidden_dim, self.output_dim))

        # Setup Pyro optimizer
        self.optimizer = SGD({
            "lr": hparams["step_length"],
            "weight_decay": hparams["weight_decay"],
            "momentum": hparams["momentum"]
        })
        self.svi = SVI(self.model, self.guide, self.optimizer, loss=Trace_ELBO())
        
        self.burn_in = hparams['burn_in']
        self.step_count = 0

    def model(self, x, y=None):
        # First layer
        w1 = pyro.sample("w1", dist.Normal(self.w1_mu, torch.exp(self.w1_sigma)).to_event(2))
        h = F.relu(x @ w1)

        # Second layer
        w2 = pyro.sample("w2", dist.Normal(self.w2_mu, torch.exp(self.w2_sigma)).to_event(2))
        logits = h @ w2

        # Observe data
        with pyro.plate("data", x.shape[0]):
            pyro.sample("obs", dist.Categorical(logits=logits), obs=y)

    def guide(self, x, y=None):
        # First layer
        w1 = pyro.sample("w1", dist.Normal(self.w1_mu, torch.exp(self.w1_sigma)).to_event(2))

        # Second layer
        w2 = pyro.sample("w2", dist.Normal(self.w2_mu, torch.exp(self.w2_sigma)).to_event(2))

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x.view(x.size(0), -1) for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])

        # Perform SVI step
        loss = self.svi.step(all_x, all_y)
        
        self.step_count += 1

        return {'loss': loss}

    def predict(self, x):
        x = x.view(x.size(0), -1)
        num_samples = self.num_samples

        if self.step_count <= self.burn_in:
            # During burn-in, use point estimates
            w1 = self.w1_mu
            w2 = self.w2_mu
            h = F.relu(x @ w1)
            logits = h @ w2
            return F.softmax(logits, dim=-1)
        else:
            # After burn-in, use full Bayesian prediction
            def wrapped_model(x_data):
                pyro.sample("prediction", dist.Categorical(logits=self.model(x_data)))

            posterior = pyro.infer.Predictive(wrapped_model, guide=self.guide, num_samples=num_samples)(x)
            predictions = posterior["prediction"]
            return predictions.float().mean(0)


================================================
FILE: transopt/benchmark/HPO/augmentation.py
================================================
import torch
import numpy as np
import random
from transopt.benchmark.HPO.image_options import *


def mixup_data(x, y, alpha=0.3, device='cpu'):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)
    print('mixup in the device:', device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

    
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


class Cutout(object):
    """Randomly mask out one or more patches from an image.

    Args:
        n_holes (int): Number of patches to cut out of each image.
        length (int): The length (in pixels) of each square patch.
    """
    def __init__(self, n_holes = None, length = None):
        if n_holes is None:
            self.n_holes = 1
        else:
            self.n_holes = n_holes
        if length is None:
            self.length = 16
        else:
            self.length = length

    def __call__(self, img):
        """
        Args:
            img (Tensor): Tensor image of size (C, H, W).
        Returns:
            Tensor: Image with n_holes of dimension length x length cut out of it.
        """
        h = img.size(1)
        w = img.size(2)

        mask = np.ones((h, w), np.float32)

        for n in range(self.n_holes):
            y = np.random.randint(h)
            x = np.random.randint(w)

            y1 = np.clip(y - self.length // 2, 0, h)
            y2 = np.clip(y + self.length // 2, 0, h)
            x1 = np.clip(x - self.length // 2, 0, w)
            x2 = np.clip(x + self.length // 2, 0, w)

            mask[y1: y2, x1: x2] = 0.

        mask = torch.from_numpy(mask)
        mask = mask.expand_as(img)
        img = img * mask

        return img
    
    
class ImageNetPolicy(object):
    """ Randomly choose one of the best 24 Sub-policies on ImageNet.

        Example:
        >>> policy = ImageNetPolicy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform = transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     ImageNetPolicy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.4, "posterize", 8, 0.6, "rotate", 9, fillcolor),
            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor),
            SubPolicy(0.6, "posterize", 7, 0.6, "posterize", 6, fillcolor),
            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),

            SubPolicy(0.4, "equalize", 4, 0.8, "rotate", 8, fillcolor),
            SubPolicy(0.6, "solarize", 3, 0.6, "equalize", 7, fillcolor),
            SubPolicy(0.8, "posterize", 5, 1.0, "equalize", 2, fillcolor),
            SubPolicy(0.2, "rotate", 3, 0.6, "solarize", 8, fillcolor),
            SubPolicy(0.6, "equalize", 8, 0.4, "posterize", 6, fillcolor),

            SubPolicy(0.8, "rotate", 8, 0.4, "color", 0, fillcolor),
            SubPolicy(0.4, "rotate", 9, 0.6, "equalize", 2, fillcolor),
            SubPolicy(0.0, "equalize", 7, 0.8, "equalize", 8, fillcolor),
            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor),

            SubPolicy(0.8, "rotate", 8, 1.0, "color", 2, fillcolor),
            SubPolicy(0.8, "color", 8, 0.8, "solarize", 7, fillcolor),
            SubPolicy(0.4, "sharpness", 7, 0.6, "invert", 8, fillcolor),
            SubPolicy(0.6, "shearX", 5, 1.0, "equalize", 9, fillcolor),
            SubPolicy(0.4, "color", 0, 0.6, "equalize", 3, fillcolor),

            SubPolicy(0.4, "equalize", 7, 0.2, "solarize", 4, fillcolor),
            SubPolicy(0.6, "solarize", 5, 0.6, "autocontrast", 5, fillcolor),
            SubPolicy(0.6, "invert", 4, 1.0, "equalize", 8, fillcolor),
            SubPolicy(0.6, "color", 4, 1.0, "contrast", 8, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.6, "equalize", 3, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment ImageNet Policy"


class CIFAR10Policy(object):
    """ Randomly choose one of the best 25 Sub-policies on CIFAR10.

        Example:
        >>> policy = CIFAR10Policy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     CIFAR10Policy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor),
            SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor),
            SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor),
            SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor),

            SubPolicy(0.2, "shearY", 7, 0.3, "posterize", 7, fillcolor),
            SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor),
            SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor),
            SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor),
            SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor),

            SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor),
            SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor),
            SubPolicy(0.4, "translateY", 3, 0.2, "sharpness", 6, fillcolor),
            SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor),
            SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor),

            SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor),
            SubPolicy(0.2, "equalize", 8, 0.6, "equalize", 4, fillcolor),
            SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor),
            SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor),
            SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor),

            SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor),
            SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor),
            SubPolicy(0.7, "translateY", 9, 0.9, "autocontrast", 1, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment CIFAR10 Policy"
    
    
class CIFAR10PolicyPhotometric(object):
    """ Randomly choose one of the best 25 Sub-policies on CIFAR10.

        Example:
        >>> policy = CIFAR10Policy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     CIFAR10Policy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.1, "invert", 7, 0.2, "contrast", 6, fillcolor),
            SubPolicy(0.8, "sharpness", 1, 0.9, "sharpness", 3, fillcolor),
            SubPolicy(0.5, "autocontrast", 8, 0.9, "equalize", 2, fillcolor),

            SubPolicy(0.4, "color", 3, 0.6, "brightness", 7, fillcolor),
            SubPolicy(0.3, "sharpness", 9, 0.7, "brightness", 9, fillcolor),
            SubPolicy(0.6, "equalize", 5, 0.5, "equalize", 1, fillcolor),
            SubPolicy(0.6, "contrast", 7, 0.6, "sharpness", 5, fillcolor),

            SubPolicy(0.7, "color", 7, 0.5, "translateX", 8, fillcolor),
            SubPolicy(0.3, "equalize", 7, 0.4, "autocontrast", 8, fillcolor),
            SubPolicy(0.9, "brightness", 6, 0.2, "color", 8, fillcolor),
            SubPolicy(0.5, "solarize", 2, 0.0, "invert", 3, fillcolor),

            SubPolicy(0.2, "equalize", 0, 0.6, "autocontrast", 0, fillcolor),
            SubPolicy(0.2, "equalize", 8, 0.6, "equalize", 4, fillcolor),
            SubPolicy(0.9, "color", 9, 0.6, "equalize", 6, fillcolor),
            SubPolicy(0.8, "autocontrast", 4, 0.2, "solarize", 8, fillcolor),
            SubPolicy(0.1, "brightness", 3, 0.7, "color", 0, fillcolor),

            SubPolicy(0.4, "solarize", 5, 0.9, "autocontrast", 3, fillcolor),
            SubPolicy(0.9, "autocontrast", 2, 0.8, "solarize", 3, fillcolor),
            SubPolicy(0.8, "equalize", 8, 0.1, "invert", 3, fillcolor),
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment CIFAR10 Photometric Policy"


class CIFAR10PolicyGeometric(object):
    """ Randomly choose one of the best 25 Sub-policies on CIFAR10.

        Example:
        >>> policy = CIFAR10Policy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     CIFAR10Policy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.7, "rotate", 2, 0.3, "translateX", 9, fillcolor),
            SubPolicy(0.5, "shearY", 8, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.5, "shearX", 7, 0.3, "posterize", 7, fillcolor),
            SubPolicy(0.9, "translateY", 9, 0.7, "translateY", 9, fillcolor),
            SubPolicy(0.7, "translateX", 9, 0.9, "autocontrast", 1, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment CIFAR10 Geometric Policy"


class SVHNPolicy(object):
    """ Randomly choose one of the best 25 Sub-policies on SVHN.

        Example:
        >>> policy = SVHNPolicy()
        >>> transformed = policy(image)

        Example as a PyTorch Transform:
        >>> transform=transforms.Compose([
        >>>     transforms.Resize(256),
        >>>     SVHNPolicy(),
        >>>     transforms.ToTensor()])
    """
    def __init__(self, fillcolor=(128, 128, 128)):
        self.policies = [
            SubPolicy(0.9, "shearX", 4, 0.2, "invert", 3, fillcolor),
            SubPolicy(0.9, "shearY", 8, 0.7, "invert", 5, fillcolor),
            SubPolicy(0.6, "equalize", 5, 0.6, "solarize", 6, fillcolor),
            SubPolicy(0.9, "invert", 3, 0.6, "equalize", 3, fillcolor),
            SubPolicy(0.6, "equalize", 1, 0.9, "rotate", 3, fillcolor),

            SubPolicy(0.9, "shearX", 4, 0.8, "autocontrast", 3, fillcolor),
            SubPolicy(0.9, "shearY", 8, 0.4, "invert", 5, fillcolor),
            SubPolicy(0.9, "shearY", 5, 0.2, "solarize", 6, fillcolor),
            SubPolicy(0.9, "invert", 6, 0.8, "autocontrast", 1, fillcolor),
            SubPolicy(0.6, "equalize", 3, 0.9, "rotate", 3, fillcolor),

            SubPolicy(0.9, "shearX", 4, 0.3, "solarize", 3, fillcolor),
            SubPolicy(0.8, "shearY", 8, 0.7, "invert", 4, fillcolor),
            SubPolicy(0.9, "equalize", 5, 0.6, "translateY", 6, fillcolor),
            SubPolicy(0.9, "invert", 4, 0.6, "equalize", 7, fillcolor),
            SubPolicy(0.3, "contrast", 3, 0.8, "rotate", 4, fillcolor),

            SubPolicy(0.8, "invert", 5, 0.0, "translateY", 2, fillcolor),
            SubPolicy(0.7, "shearY", 6, 0.4, "solarize", 8, fillcolor),
            SubPolicy(0.6, "invert", 4, 0.8, "rotate", 4, fillcolor),
            SubPolicy(0.3, "shearY", 7, 0.9, "translateX", 3, fillcolor),
            SubPolicy(0.1, "shearX", 6, 0.6, "invert", 5, fillcolor),

            SubPolicy(0.7, "solarize", 2, 0.6, "translateY", 7, fillcolor),
            SubPolicy(0.8, "shearY", 4, 0.8, "invert", 8, fillcolor),
            SubPolicy(0.7, "shearX", 9, 0.8, "translateY", 3, fillcolor),
            SubPolicy(0.8, "shearY", 5, 0.7, "autocontrast", 3, fillcolor),
            SubPolicy(0.7, "shearX", 2, 0.1, "invert", 5, fillcolor)
        ]

    def __call__(self, img):
        policy_idx = random.randint(0, len(self.policies) - 1)
        return self.policies[policy_idx](img)

    def __repr__(self):
        return "AutoAugment SVHN Policy"


class SubPolicy(object):
    def __init__(self, p1, operation1, magnitude_idx1, p2, operation2, magnitude_idx2, fillcolor=(128, 128, 128)):
        ranges = {
            "shearX": np.linspace(0, 0.3, 10),
            "shearY": np.linspace(0, 0.3, 10),
            "translateX": np.linspace(0, 150 / 331, 10),
            "translateY": np.linspace(0, 150 / 331, 10),
            "rotate": np.linspace(0, 30, 10),
            "color": np.linspace(0.0, 0.9, 10),
            "posterize": np.round(np.linspace(8, 4, 10), 0).astype(int),  # 修改这里
            "solarize": np.linspace(256, 0, 10),
            "contrast": np.linspace(0.0, 0.9, 10),
            "sharpness": np.linspace(0.0, 0.9, 10),
            "brightness": np.linspace(0.0, 0.9, 10),
            "autocontrast": [0] * 10,
            "equalize": [0] * 10,
            "invert": [0] * 10
        }

        func = {
            "shearX": ShearX(fillcolor=fillcolor),
            "shearY": ShearY(fillcolor=fillcolor),
            "translateX": TranslateX(fillcolor=fillcolor),
            "translateY": TranslateY(fillcolor=fillcolor),
            "rotate": Rotate(),
            "color": Color(),
            "posterize": Posterize(),
            "solarize": Solarize(),
            "contrast": Contrast(),
            "sharpness": Sharpness(),
            "brightness": Brightness(),
            "autocontrast": AutoContrast(),
            "equalize": Equalize(),
            "invert": Invert()
        }

        self.p1 = p1
        self.operation1 = func[operation1]
        self.magnitude1 = ranges[operation1][magnitude_idx1]
        self.p2 = p2
        self.operation2 = func[operation2]
        self.magnitude2 = ranges[operation2][magnitude_idx2]

    def __call__(self, img):
        if random.random() < self.p1:
            img = self.operation1(img, self.magnitude1)
        if random.random() < self.p2:
            img = self.operation2(img, self.magnitude2)
        return img

================================================
FILE: transopt/benchmark/HPO/datasets.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import os
import numpy as np
import torch
from PIL import Image, ImageFile
from torchvision import transforms
from torch.utils.data import TensorDataset, Subset, ConcatDataset, Dataset
from torchvision.datasets import MNIST, ImageNet, CIFAR10, CIFAR100


import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


from robustbench.data import load_cifar10c, load_cifar100c, load_imagenetc

from transopt.benchmark.HPO.augmentation import ImageNetPolicy, CIFAR10Policy, CIFAR10PolicyGeometric, CIFAR10PolicyPhotometric, Cutout

ImageFile.LOAD_TRUNCATED_IMAGES = True


def data_transform(dataset_name, augmentation_name=None):
    if dataset_name.lower() == 'cifar10' or dataset_name.lower() == 'cifar100':
        mean = (0.4914, 0.4822, 0.4465)
        std = (0.2023, 0.1994, 0.2010)
        size = 32
    elif dataset_name.lower() == 'imagenet':
        mean = (0.485, 0.456, 0.406)
        std = (0.229, 0.224, 0.225)
        size = 224
    else:
        raise ValueError(f"Unsupported dataset: {dataset_name}")

    # transform_list = [transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize(mean, std)]
    transform_list = [transforms.ToPILImage(), transforms.ToTensor()]

    if augmentation_name:
        if dataset_name.lower() in ['cifar10', 'cifar100']:
            if augmentation_name.lower() == 'cutout':
                transform_list.insert(-1,Cutout(n_holes=1, length=16))
            elif augmentation_name.lower() == 'geometric':
                transform_list.insert(1, CIFAR10PolicyGeometric())
            elif augmentation_name.lower() == 'photometric':
                transform_list.insert(1, CIFAR10PolicyPhotometric())
            elif augmentation_name.lower() == 'autoaugment':
                transform_list.insert(1, CIFAR10Policy())
            elif augmentation_name.lower() == 'mixup':
                print("Mixup should be applied during training, not as part of the transform.")
            else:
                raise ValueError(f"Unsupported augmentation strategy for CIFAR: {augmentation_name}")
        elif dataset_name.lower() == 'imagenet':
            if augmentation_name.lower() == 'cutout':
                transform_list.append(Cutout())
            elif augmentation_name.lower() == 'autoaugment':
                transform_list.insert(0, ImageNetPolicy())
            elif augmentation_name.lower() == 'mixup':
                print("Mixup should be applied during training, not as part of the transform.")
            else:
                raise ValueError(f"Unsupported augmentation strategy for ImageNet: {augmentation_name}")
        else:
            raise ValueError(f"Unsupported dataset for augmentation: {dataset_name}")
    print(transform_list)
    return transforms.Compose(transform_list)

def get_dataset_class(dataset_name):
    """Return the dataset class with the given name."""
    if dataset_name not in globals():
        raise NotImplementedError("Dataset not found: {}".format(dataset_name))
    return globals()[dataset_name]

def num_environments(dataset_name):
    return len(get_dataset_class(dataset_name).ENVIRONMENTS)

class Dataset:
    N_STEPS = 5001           # Default, subclasses may override
    CHECKPOINT_FREQ = 100    # Default, subclasses may override
    N_WORKERS = 1            # Default, subclasses may override
    ENVIRONMENTS = None      # Subclasses should override
    INPUT_SHAPE = None       # Subclasses should override

    def __getitem__(self, index):
        return self.datasets[index]

    def __len__(self):
        return len(self.datasets)

class RobCifar10(Dataset):
    def __init__(self, root=None, augment=False):
        super().__init__()
        if root is None:        
            user_home = os.path.expanduser('~')
            root = os.path.join(user_home, 'transopt_tmp/data')

        # Load original CIFAR-10 dataset
        original_dataset_tr = CIFAR10(root, train=True, download=True)
        original_dataset_te = CIFAR10(root, train=False, download=True)

        original_images = original_dataset_tr.data
        original_labels = torch.tensor(original_dataset_tr.targets)

        shuffle = torch.randperm(len(original_images))
        original_images = original_images[shuffle]
        original_labels = original_labels[shuffle]
        
        dataset_transform = data_transform('cifar10', augment)
        normalized_images = data_transform('cifar10', None)
    
        transformed_images = torch.stack([dataset_transform(img) for img in original_images])
        standard_test_images = torch.stack([normalized_images(img) for img in original_dataset_te.data])

        self.input_shape = (3, 32, 32)
        self.num_classes = 10
        self.datasets = {}

        # Split into train and validation sets
        val_size = len(transformed_images) // 10
        self.datasets['train'] = TensorDataset(transformed_images[:-val_size], original_labels[:-val_size])
        self.datasets['val'] = TensorDataset(transformed_images[-val_size:], original_labels[-val_size:])
        
        # Standard test set
        self.datasets['test_standard'] = TensorDataset(standard_test_images, torch.tensor(original_dataset_te.targets))
        
        # Corruption test sets
        self.corruptions = [
            'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',
            'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',
            'brightness', 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression'
        ]
        for corruption in self.corruptions:
            x_test_corrupt, y_test_corrupt = load_cifar10c(n_examples=5000, corruptions=[corruption], severity=5, data_dir=root)
            x_test_corrupt = torch.stack([normalized_images(img) for img in x_test_corrupt])
            self.datasets[f'test_corruption_{corruption}'] = TensorDataset(x_test_corrupt, y_test_corrupt)

        # Load CIFAR-10.1 dataset
        cifar101_path = os.path.join(root, 'cifar10.1_v6_data.npy')
        cifar101_labels_path = os.path.join(root, 'cifar10.1_v6_labels.npy')
        if os.path.exists(cifar101_path) and os.path.exists(cifar101_labels_path):
            cifar101_data = np.load(cifar101_path)
            cifar101_labels = np.load(cifar101_labels_path)
            cifar101_data = torch.from_numpy(cifar101_data).float() / 255.0
            cifar101_data = cifar101_data.permute(0, 3, 1, 2)  # Change from (N, 32, 32, 3) to (N, 3, 32, 32)
            cifar101_data = torch.stack([normalized_images(img) for img in cifar101_data])
            cifar101_labels = torch.from_numpy(cifar101_labels).long()
            self.datasets['test_cifar10.1'] = TensorDataset(cifar101_data, cifar101_labels)
        else:
            print("CIFAR-10.1 dataset not found. Please download it to the data directory.")

        # Load CIFAR-10.2 dataset
        cifar102_path = os.path.join(root, 'cifar102_test.npz')
        if os.path.exists(cifar102_path):
            cifar102_data = np.load(cifar102_path)
            cifar102_images = cifar102_data['images']
            cifar102_labels = cifar102_data['labels']
            cifar102_images = torch.from_numpy(cifar102_images).float() / 255.0
            cifar102_images = cifar102_images.permute(0, 3, 1, 2)  # Change from (N, 32, 32, 3) to (N, 3, 32, 32)
            cifar102_images = torch.stack([normalized_images(img) for img in cifar102_images])
            cifar102_labels = torch.from_numpy(cifar102_labels).long()
            self.datasets['test_cifar10.2'] = TensorDataset(cifar102_images, cifar102_labels)
        else:
            print("CIFAR-10.2 dataset not found. Please download it to the data directory.")

    def get_available_test_set_names(self):
        """
        Return a list of available test set names.
        """
        return list(self.datasets.keys())


    def get_test_set(self, name):
        """
        Get a specific test set by name.
        Available names: 'standard', 'corruption_<corruption_name>', 'cifar10.1', 'cifar10.2'
        """
        return self.test_sets.get(name, None)

    def get_all_test_sets(self):
        """
        Return all available test sets.
        """
        return self.test_sets

class RobCifar100(Dataset):
    def __init__(self, root, augment=False):
        super().__init__()
        if root is None:        
            user_home = os.path.expanduser('~')
            root = os.path.join(user_home, 'transopt_tmp/data')

        original_dataset_tr = CIFAR100(root, train=True, download=True)
        original_dataset_te = CIFAR100(root, train=False, download=True)

        original_images = original_dataset_tr.data
        original_labels = torch.tensor(original_dataset_tr.targets)

        shuffle = torch.randperm(len(original_images))
        original_images = original_images[shuffle]
        original_labels = original_labels[shuffle]

        dataset_transform = self.get_transform(augment)

        transformed_images = torch.stack([dataset_transform(img) for img in original_images])

        self.input_shape = (3, 32, 32)
        self.num_classes = 100
        self.datasets = TensorDataset(transformed_images, original_labels)
        
        # Standard test set
        test_images = torch.tensor(original_dataset_te.data).float() / 255.0
        test_labels = torch.tensor(original_dataset_te.targets)
        self.test_sets = {'standard': TensorDataset(test_images, test_labels)}

        # Corruption test sets
        corruptions = [
            'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',
            'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',
            'brightness', 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression'
        ]
        for corruption in corruptions:
            x_test, y_test = load_cifar100c(n_examples=10000, corruptions=[corruption], severity=5, data_dir=root)
            self.test_sets[f'corruption_{corruption}'] = TensorDataset(x_test, y_test)

    def get_transform(self, augment):
        if augment:
            return transforms.Compose([
                transforms.ToPILImage(),
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
            ])
        else:
            return transforms.Compose([
                transforms.ToPILImage(),
                transforms.ToTensor(),
                transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
            ])

    def get_test_set(self, name):
        """
        Get a specific test set by name.
        Available names: 'standard', 'corruption_<corruption_name>'
        """
        return self.test_sets.get(name, None)

    def get_all_test_sets(self):
        """
        Return all available test sets.
        """
        return self.test_sets


class RobImageNet(Dataset):
    def __init__(self, root, augment=False):
        super().__init__()
        if root is None:        
            user_home = os.path.expanduser('~')
            root = os.path.join(user_home, 'transopt_tmp/data')

        transform = self.get_transform(augment)

        self.datasets = ImageNet(root=root, split='train', transform=transform)
        self.test_sets = {'standard': ImageNet(root=root, split='val', transform=self.get_transform(False))}

        self.input_shape = (3, 224, 224)
        self.num_classes = 1000

        # Corruption test sets
        corruptions = [
            'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',
            'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',
            'brightness', 'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression'
        ]
        for corruption in corruptions:
            x_test, y_test = load_imagenetc(n_examples=5000, corruptions=[corruption], severity=5, data_dir=root)
            self.test_sets[f'corruption_{corruption}'] = TensorDataset(x_test, y_test)

    def get_transform(self, augment):
        if augment:
            print("Data augmentation is enabled.")
            return transforms.Compose([
                transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
                transforms.RandomGrayscale(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        else:
            print("Data augmentation is disabled.")
            return transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])

    def get_test_set(self, name):
        """
        Get a specific test set by name.
        Available names: 'standard', 'corruption_<corruption_name>'
        """
        return self.test_sets.get(name, None)

    def get_all_test_sets(self):
        """
        Return all available test sets.
        """
        return self.test_sets

def test_dataset(dataset_name='cifar10', num_samples=5):
    # Set up the dataset
    if dataset_name.lower() == 'cifar10':
        dataset = RobCifar10(root=None, augment=True)
    else:
        raise ValueError(f"Unsupported dataset: {dataset_name}")

    # Test training data
    assert 'train' in dataset.datasets, "Training dataset is missing"
    print(f"Training dataset size: {len(dataset.datasets['train'])}")
    train_sample = dataset.datasets['train'][0]
    print(f"Training data shape: {train_sample[0].shape}")
    print(f"Training label shape: {train_sample[1].shape}")

    # Test validation data
    assert 'val' in dataset.datasets, "Validation dataset is missing"
    print(f"Validation dataset size: {len(dataset.datasets['val'])}")

    # Test standard test set
    assert 'test_standard' in dataset.datasets, "Standard test set is missing"
    print(f"Standard test set size: {len(dataset.datasets['test_standard'])}")

    # Test corruption test sets
    for corruption in dataset.corruptions[:num_samples]:
        corruption_key = f'test_corruption_{corruption}'
        assert corruption_key in dataset.datasets, f"Corruption test set '{corruption}' is missing"
        print(f"Corruption test set '{corruption}' size: {len(dataset.datasets[corruption_key])}")

    # Test additional test sets (CIFAR-10.1 and CIFAR-10.2)
    for additional_test in ['test_cifar10.1', 'test_cifar10.2']:
        if additional_test in dataset.datasets:
            print(f"{additional_test.upper()} test set size: {len(dataset.datasets[additional_test])}")
        else:
            print(f"{additional_test.upper()} test set not found")

    # Test data loading
    print("\nTesting data loading:")
    for key, data in dataset.datasets.items():
        try:
            sample = data[0]
            print(f"Successfully loaded sample from {key}")
            if isinstance(sample, tuple):
                print(f"  Sample shape: {sample[0].shape}, Label: {sample[1]}")
            else:
                print(f"  Sample shape: {sample.shape}")
        except Exception as e:
            print(f"Error loading data from {key}: {str(e)}")

    print(f"\nAll tests for {dataset_name} passed successfully!")
    
def visualize_dataset_tsne(dataset_name='cifar10', n_samples=1000, perplexity=30, n_iter=1000):
    # Set up data transformation
    non_augment = data_transform(dataset_name, augmentation_name=None)
    augment = data_transform(dataset_name, augmentation_name='photometric')

    # Load dataset
    if dataset_name.lower() == 'cifar10':
        dataset = RobCifar10(root=None, augment=False)
    else:
        raise ValueError(f"Unsupported dataset: {dataset_name}")

    # Prepare data for t-SNE
    all_images = []
    all_labels = []
    dataset_types = []

    for key, data in dataset.datasets.items():
        loader = DataLoader(data, batch_size=n_samples, shuffle=True)
        images, labels = next(iter(loader))

        if key == 'train':
            origin_images = torch.stack([non_augment(img) for img in images])
            all_images.append(origin_images)
            all_labels.append(labels)
            dataset_types.extend(['train_without_aug'] * len(origin_images))
            
            augmented_images = torch.stack([augment(img) for img in images])
            all_images.append(augmented_images)
            all_labels.append(labels)
            dataset_types.extend(['augmented'] * len(augmented_images))
            continue
        
        if key.startswith('test_') and key != 'test_standard':
            all_images.append(images)
            all_labels.append(labels)
            dataset_types.extend(['test_ds'] * len(images))
        # else:
        #     all_images.append(images)
        #     all_labels.append(labels)
        #     dataset_types.extend([key] * len(images))

    all_images = torch.cat(all_images, dim=0)
    all_labels = torch.cat(all_labels, dim=0)
    all_images_flat = all_images.view(all_images.size(0), -1).numpy()

    # Apply t-SNE
    tsne = TSNE(n_components=2, perplexity=perplexity, n_iter=n_iter, random_state=42)
    tsne_results = tsne.fit_transform(all_images_flat)

    # Visualize results
    plt.figure(figsize=(16, 12))
    
    # Define a fixed color map
    fixed_color_map = {
        'train_without_aug': '#1f77b4',  # blue
        'augmented': '#ff7f0e',          # orange
        'val': '#2ca02c',                # green
        'test_standard': '#d62728',      # red
        'test_ds': '#9467bd',            # purple
        'test_cifar10.1': '#8c564b',     # brown
        'test_cifar10.2': '#e377c2'      # pink
    }
    
    for dtype in fixed_color_map.keys():
        mask = np.array(dataset_types) == dtype
        if np.any(mask):  # Only plot if there are data points for this type
            plt.scatter(tsne_results[mask, 0], tsne_results[mask, 1], 
                        c=fixed_color_map[dtype], label=dtype, alpha=0.6)

    plt.legend()
    plt.title(f't-SNE visualization of {dataset_name} dataset')
    plt.savefig(f'{dataset_name}_tsne_visualization.png')
    plt.close()

    print(f"t-SNE visualization has been saved as '{dataset_name}_tsne_visualization.png'")

if __name__ == "__main__":
    # test_dataset('cifar10')
    # test_dataset('cifar100')
    # test_dataset('imagenet')

    visualize_dataset_tsne(dataset_name='cifar10', n_samples=1000)

    # ... (之后的代码保持不变)

================================================
FILE: transopt/benchmark/HPO/fast_data_loader.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import torch

class _InfiniteSampler(torch.utils.data.Sampler):
    """Wraps another Sampler to yield an infinite stream."""
    def __init__(self, sampler):
        self.sampler = sampler

    def __iter__(self):
        while True:
            for batch in self.sampler:
                yield batch

class InfiniteDataLoader:
    def __init__(self, dataset, batch_size, num_workers):
        super().__init__()


        sampler = torch.utils.data.RandomSampler(dataset,
            replacement=True)

        batch_sampler = torch.utils.data.BatchSampler(
            sampler,
            batch_size=batch_size,
            drop_last=True)

        self._infinite_iterator = iter(torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=_InfiniteSampler(batch_sampler)
        ))

    def __iter__(self):
        while True:
            yield next(self._infinite_iterator)

    def __len__(self):
        raise ValueError

class FastDataLoader:
    """DataLoader wrapper with slightly improved speed by not respawning worker
    processes at every epoch."""
    def __init__(self, dataset, batch_size, num_workers):
        super().__init__()

        batch_sampler = torch.utils.data.BatchSampler(
            torch.utils.data.RandomSampler(dataset, replacement=False),
            batch_size=batch_size,
            drop_last=False
        )

        self._infinite_iterator = iter(torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=_InfiniteSampler(batch_sampler)
        ))

        self._length = len(batch_sampler)

    def __iter__(self):
        for _ in range(len(self)):
            yield next(self._infinite_iterator)

    def __len__(self):
        return self._length


================================================
FILE: transopt/benchmark/HPO/hparams_registry.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np


def get_hparams(algorithm, dataset, random_seed, model_size=None, architecture='resnet'):
    """
    Global registry of hyperparams. Each entry is a (default, random) tuple.
    New algorithms / networks / etc. should add entries here.
    """
    hparams = {}
    hparam_space = get_hparam_space(algorithm, model_size, architecture)
    random_state = np.random.RandomState(random_seed)

    for name, (hparam_type, range_or_values) in hparam_space.items():
        if hparam_type == 'categorical':
            default_val = range_or_values[0]
            random_val = random_state.choice(range_or_values)
        elif hparam_type == 'float':
            default_val = sum(range_or_values) / 2
            random_val = random_state.uniform(*range_or_values)
        elif hparam_type == 'int':
            default_val = int(sum(range_or_values) / 2)
            random_val = random_state.randint(*range_or_values)
        elif hparam_type == 'log':
            default_val = 10 ** (sum(range_or_values) / 2)
            random_val = 10 ** random_state.uniform(*range_or_values)
        else:
            raise ValueError(f"Unknown hparam type: {hparam_type}")

        hparams[name] = (default_val, random_val)

    return hparams

def default_hparams(algorithm, dataset, model_size='small', architecture='resnet'):
    return {a: b for a, (b, c) in get_hparams(algorithm, dataset, 0, model_size, architecture).items()}

def random_hparams(algorithm, dataset, seed, model_size='small', architecture='resnet'):
    return {a: c for a, (b, c) in get_hparams(algorithm, dataset, seed, model_size, architecture).items()}

def get_hparam_space(algorithm, model_size=None, architecture='resnet'):
    """
    Returns a dictionary of hyperparameter spaces for the given algorithm and dataset.
    Each entry is a tuple of (type, range) where type is 'float', 'int', or 'categorical'.
    """
    hparam_space = {}

    if algorithm in ['ERM', 'GLMNet', 'BayesianNN']:
        hparam_space['lr'] = ('log', (-6, -2))
        hparam_space['weight_decay'] = ('log', (-7, -4))
        hparam_space['momentum'] = ('float', (0.5, 0.999))
        hparam_space['batch_size'] = ('categorical', [16, 32, 64, 128])

    if algorithm == 'ERM':
        # hparam_space['batch_size'] = ('categorical', [16, 32, 64, 128])
        hparam_space['dropout_rate'] = ('float', (0, 0.5))
        if architecture.lower() == 'cnn':
            hparam_space['hidden_dim1'] = ('categorical', [32, 64, 128])
            hparam_space['hidden_dim2'] = ('categorical', [32, 64, 128])

    if algorithm == 'GLMNet':
        hparam_space['glmnet_alpha'] = ('log', (-4, 1))
        hparam_space['glmnet_l1_ratio'] = ('float', (0, 1))

    if algorithm == 'BayesianNN':
        hparam_space['bayesian_num_samples'] = ('categorical', [5, 10, 20, 50])
        hparam_space['bayesian_hidden_dim1'] = ('categorical', [32, 64, 128, 256])
        hparam_space['bayesian_hidden_dim2'] = ('categorical', [32, 64, 128, 256])
        hparam_space['step_length'] = ('log', (-4, -1))
        hparam_space['burn_in'] = ('categorical', [500, 1000, 2000, 5000])

    # Add hidden dimensions for CNN architecture


    return hparam_space

def test_hparam_registry():
    algorithms = ['ERM', 'GLMNet', 'BayesianNN']
    datasets = ['RobCifar10', 'RobCifar100', 'RobImageNet']
    architectures = ['resnet', 'wideresnet', 'densenet', 'alexnet', 'cnn']

    for algorithm in algorithms:
        for dataset in datasets:
            print(f"\nTesting: Algorithm={algorithm}, Dataset={dataset}")

            # Get default hyperparameters
            default_hparam = default_hparams(algorithm, dataset)
            print("\nDefault hyperparameters:")
            for hparam, value in default_hparam.items():
                print(f"  {hparam}: {value}")

            # Get random hyperparameters
            random_hparam = random_hparams(algorithm, dataset, seed=42)
            print("\nRandom hyperparameters:")
            for hparam, value in random_hparam.items():
                print(f"  {hparam}: {value}")

            # Get hyperparameter space
            hparam_space = get_hparam_space(algorithm, dataset)
            print("\nHyperparameter space:")
            for hparam, (htype, hrange) in hparam_space.items():
                print(f"  {hparam}: type={htype}, range={hrange}")

            print("\n" + "="*50)

if __name__ == "__main__":
    test_hparam_registry()

================================================
FILE: transopt/benchmark/HPO/image_options.py
================================================
from PIL import Image, ImageEnhance, ImageOps
import random


class ShearX(object):
    def __init__(self, fillcolor=(128, 128, 128)):
        self.fillcolor = fillcolor

    def __call__(self, x, magnitude):
        return x.transform(
            x.size, Image.AFFINE, (1, magnitude * random.choice([-1, 1]), 0, 0, 1, 0),
            Image.BICUBIC, fillcolor=self.fillcolor)


class ShearY(object):
    def __init__(self, fillcolor=(128, 128, 128)):
        self.fillcolor = fillcolor

    def __call__(self, x, magnitude):
        return x.transform(
            x.size, Image.AFFINE, (1, 0, 0, magnitude * random.choice([-1, 1]), 1, 0),
            Image.BICUBIC, fillcolor=self.fillcolor)


class TranslateX(object):
    def __init__(self, fillcolor=(128, 128, 128)):
        self.fillcolor = fillcolor

    def __call__(self, x, magnitude):
        return x.transform(
            x.size, Image.AFFINE, (1, 0, magnitude * x.size[0] * random.choice([-1, 1]), 0, 1, 0),
            fillcolor=self.fillcolor)


class TranslateY(object):
    def __init__(self, fillcolor=(128, 128, 128)):
        self.fillcolor = fillcolor

    def __call__(self, x, magnitude):
        return x.transform(
            x.size, Image.AFFINE, (1, 0, 0, 0, 1, magnitude * x.size[1] * random.choice([-1, 1])),
            fillcolor=self.fillcolor)


class Rotate(object):
    # from https://stackoverflow.com/questions/
    # 5252170/specify-image-filling-color-when-rotating-in-python-with-pil-and-setting-expand
    def __call__(self, x, magnitude):
        rot = x.convert("RGBA").rotate(magnitude * random.choice([-1, 1]))
        return Image.composite(rot, Image.new("RGBA", rot.size, (128,) * 4), rot).convert(x.mode)


class Color(object):
    def __call__(self, x, magnitude):
        return ImageEnhance.Color(x).enhance(1 + magnitude * random.choice([-1, 1]))


class Posterize(object):
    def __call__(self, x, magnitude):
        return ImageOps.posterize(x, magnitude)


class Solarize(object):
    def __call__(self, x, magnitude):
        return ImageOps.solarize(x, magnitude)


class Contrast(object):
    def __call__(self, x, magnitude):
        return ImageEnhance.Contrast(x).enhance(1 + magnitude * random.choice([-1, 1]))


class Sharpness(object):
    def __call__(self, x, magnitude):
        return ImageEnhance.Sharpness(x).enhance(1 + magnitude * random.choice([-1, 1]))


class Brightness(object):
    def __call__(self, x, magnitude):
        return ImageEnhance.Brightness(x).enhance(1 + magnitude * random.choice([-1, 1]))


class AutoContrast(object):
    def __call__(self, x, magnitude):
        return ImageOps.autocontrast(x)


class Equalize(object):
    def __call__(self, x, magnitude):
        return ImageOps.equalize(x)


class Invert(object):
    def __call__(self, x, magnitude):
        return ImageOps.invert(x)


================================================
FILE: transopt/benchmark/HPO/misc.py
================================================
import math
import hashlib
import sys
from collections import OrderedDict
from numbers import Number
import operator

import numpy as np
import torch
from collections import Counter
from itertools import cycle
import matplotlib.pyplot as plt


class _SplitDataset(torch.utils.data.Dataset):
    """Used by split_dataset"""
    def __init__(self, underlying_dataset, keys):
        super(_SplitDataset, self).__init__()
        self.underlying_dataset = underlying_dataset
        self.keys = keys
    def __getitem__(self, key):
        return self.underlying_dataset[self.keys[key]]
    def __len__(self):
        return len(self.keys)

def split_dataset(dataset, n, seed=0):
    """
    Return a pair of datasets corresponding to a random split of the given
    dataset, with n datapoints in the first dataset and the rest in the last,
    using the given random seed
    """
    assert(n <= len(dataset))
    keys = list(range(len(dataset)))
    np.random.RandomState(seed).shuffle(keys)
    keys_1 = keys[:n]
    keys_2 = keys[n:]
    return _SplitDataset(dataset, keys_1), _SplitDataset(dataset, keys_2)


def accuracy(network, loader, device):
    correct = 0
    total = 0
    weights_offset = 0

    network.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            p = network.predict(x)
            if p.size(1) == 1:
                correct += (p.gt(0).eq(y).float()).sum().item()
            else:
                correct += (p.argmax(1).eq(y).float()).sum().item()
            total += torch.ones(len(x)).sum().item()
    network.train()

    return correct / total


def print_row(row, colwidth=10, latex=False):
    if latex:
        sep = " & "
        end_ = "\\\\"
    else:
        sep = "  "
        end_ = ""

    def format_val(x):
        if np.issubdtype(type(x), np.floating):
            x = "{:.10f}".format(x)
        return str(x).ljust(colwidth)[:colwidth]
    print(sep.join([format_val(x) for x in row]), end_)
    
    
class LossPlotter:
    def __init__(self):
        self.classification_losses = []  # 用于存储分类损失
        self.reconstruction_losses = []  # 用于存储重构损失
        self.epochs = []  # 用于存储训练的 epoch 数
        self.cur = 0

        # 初始化绘图
        plt.ion()  # 开启交互模式
        self.fig, self.ax = plt.subplots(figsize=(10, 5))

    def update(self, classification_loss, reconstruction_loss):
        # 更新损失和 epoch 数据
        self.cur += 1
        self.classification_losses.append(classification_loss)
        self.reconstruction_losses.append(reconstruction_loss)
        self.epochs.append(self.cur)

        # 清空当前的图像
        self.ax.clear()

        # 绘制分类损失曲线
        self.ax.plot(self.epochs, self.classification_losses, label='Classification Loss', color='blue', marker='o')
        
        # 绘制重构损失曲线
        self.ax.plot(self.epochs, self.reconstruction_losses, label='Reconstruction Loss', color='orange', marker='x')

        # 设置图表标题和标签
        self.ax.set_title('Loss Curves')
        self.ax.set_xlabel('Epoch')
        self.ax.set_ylabel('Loss')

        # 显示图例
        self.ax.legend()

        # 更新图表
        plt.draw()
        plt.pause(0.01)  # 暂停以便更新图像

    def show(self):
        # 展示最终图像并关闭交互模式
        plt.ioff()
        plt.savefig('loss_curves.png')

================================================
FILE: transopt/benchmark/HPO/networks.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import copy

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torchvision.models


SUPPORTED_ARCHITECTURES = {
    'resnet': [18, 34, 50, 101],
    'densenet': [121, 169, 201],
    'wideresnet': [16, 22, 28, 40],
    'alexnet': [1],
    'cnn': [1]
}

def Featurizer(input_shape, architecture, model_size, hparams):
    """Select an appropriate featurizer based on the input shape and hparams."""

    if architecture == 'densenet':
        return DenseNet(input_shape, model_size, hparams)
    elif architecture == 'resnet':
        return ResNet(input_shape, model_size, hparams)
    elif architecture == 'wideresnet':
        return WideResNet(input_shape, model_size, hparams)
    elif architecture == 'alexnet':
        return AlexNet(input_shape, hparams)
    elif architecture == 'cnn':
        return CNN(input_shape, hparams)
    else:
        raise ValueError(f"Unsupported network architecture: {architecture}")
    
class Identity(nn.Module):
    """An identity layer"""
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x


class MLP(nn.Module):
    """Just  an MLP"""
    def __init__(self, n_inputs, n_outputs, hparams):
        super(MLP, self).__init__()
        self.input = nn.Linear(n_inputs, hparams['mlp_width'])
        self.dropout = nn.Dropout(hparams['dropout_rate'])
        self.hiddens = nn.ModuleList([
            nn.Linear(hparams['mlp_width'], hparams['mlp_width'])
            for _ in range(hparams['mlp_depth']-2)])
        self.output = nn.Linear(hparams['mlp_width'], n_outputs)
        self.n_outputs = n_outputs

    def forward(self, x):
        x = self.input(x)
        x = self.dropout(x)
        x = F.relu(x)
        for hidden in self.hiddens:
            x = hidden(x)
            x = self.dropout(x)
            x = F.relu(x)
        # x = self.output(x)
        return x

class ResNet(torch.nn.Module):
    """ResNet with the softmax chopped off and the batchnorm frozen"""
    def __init__(self, input_shape, model_size, hparams):
        super(ResNet, self).__init__()
        if model_size == 18:
            self.network = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)
            self.n_outputs = 512
        elif model_size == 101:
            self.network = torchvision.models.resnet101(weights=torchvision.models.ResNet101_Weights.IMAGENET1K_V2)
            self.n_outputs = 2048
        elif model_size == 34:
            self.network = torchvision.models.resnet34(weights=torchvision.models.ResNet34_Weights.IMAGENET1K_V1)
            self.n_outputs = 512
        elif model_size == 50:
            self.network = torchvision.models.resnet50(weights=torchvision.models.ResNet50_Weights.IMAGENET1K_V2)
            self.n_outputs = 2048
        else:
            raise ValueError(f"Unsupported ResNet model size: {model_size}")
        

        # adapt number of channels
        nc = input_shape[0]
        if nc != 3:
            tmp = self.network.conv1.weight.data.clone()

            self.network.conv1 = nn.Conv2d(
                nc, 64, kernel_size=(7, 7),
                stride=(2, 2), padding=(3, 3), bias=False)

            for i in range(nc):
                self.network.conv1.weight.data[:, i, :, :] = tmp[:, i % 3, :, :]

        # save memory
        del self.network.fc
        self.network.fc = Identity()

        self.freeze_bn()
        self.hparams = hparams

    def forward(self, x):
        """Encode x into a feature vector of size n_outputs."""
        return self.network(x)

    def train(self, mode=True):
        """
        Override the default train() to freeze the BN parameters
        """
        super().train(mode)
        self.freeze_bn()

    def freeze_bn(self):
        for m in self.network.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=True)


def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        init.constant_(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)


class wide_basic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1):
        super(wide_basic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, padding=0, bias=True))

    def forward(self, x):
        out = self.dropout(self.conv1(F.relu(self.bn1(x))))
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)

        return out


class WideResNet(nn.Module):
    """WideResNet with the softmax layer removed"""
    def __init__(self, input_shape, model_size, hparams):
        super(WideResNet, self).__init__()
        
        # Define configurations for different model sizes
        configs = {
            28: (28, 10),  # WRN-28-10
            16: (16, 8),   # WRN-16-8
            40: (40, 2),   # WRN-40-2
            22: (22, 2)    # WRN-22-2
        }
        
        if model_size not in configs:
            raise ValueError(f"Unsupported model size: {model_size}. Choose from {list(configs.keys())}")
        
        self.depth, self.widen_factor = configs[model_size]
        self.nChannels = [16, 16*self.widen_factor, 32*self.widen_factor, 64*self.widen_factor]
        self.in_planes = 16

        assert ((self.depth-4) % 6 == 0), 'Wide-resnet depth should be 6n+4'
        n = (self.depth-4) // 6
        self.n_outputs = self.nChannels[3]
        self.dropout = hparams['dropout_rate']

        self.conv1 = nn.Conv2d(input_shape[0], self.nChannels[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.layer1 = self._wide_layer(wide_basic, self.nChannels[1], n, self.dropout, stride=1)
        self.layer2 = self._wide_layer(wide_basic, self.nChannels[2], n, self.dropout, stride=2)
        self.layer3 = self._wide_layer(wide_basic, self.nChannels[3], n, self.dropout, stride=2)
        self.bn1 = nn.BatchNorm2d(self.nChannels[3], momentum=0.9)
    

    def _wide_layer(self, block, planes, num_blocks, dropout, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []

        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout, stride))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        return out


class DenseNet(nn.Module):
    """DenseNet with the softmax layer removed"""
    def __init__(self, input_shape, model_size, hparams):
        super(DenseNet, self).__init__()
        self.model_size = model_size
        if self.model_size == 121:
            self.network = torchvision.models.densenet121(weights=torchvision.models.DenseNet121_Weights.IMAGENET1K_V1)
            self.n_outputs = 1024
        elif self.model_size == 169:
            self.network = torchvision.models.densenet169(weights=torchvision.models.DenseNet169_Weights.IMAGENET1K_V1)
            self.n_outputs = 1664
        elif self.model_size == 201:
            self.network = torchvision.models.densenet201(weights=torchvision.models.DenseNet201_Weights.IMAGENET1K_V1)
            self.n_outputs = 1920
        else:
            raise ValueError("Unsupported DenseNet depth. Choose from 121, 169, or 201.")

        # Adapt number of channels
        nc = input_shape[0]
        if nc != 3:
            self.network.features.conv0 = nn.Conv2d(nc, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # Remove the last fully connected layer
        self.network.classifier = Identity()

        # self.dropout = nn.Dropout(hparams['dropout_rate'])

    def forward(self, x):
        features = self.network(x)
        return features
    
    
class ht_CNN(nn.Module):
    """
    Hand-tuned architecture for MNIST.
    Weirdness I've noticed so far with this architecture:
    - adding a linear layer after the mean-pool in features hurts
        RotatedMNIST-100 generalization severely.
    """
    n_outputs = 128

    def __init__(self, input_shape):
        super(ht_CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 64, 3, 1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(128, 128, 3, 1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, 1, padding=1)

        self.bn0 = nn.GroupNorm(8, 64)
        self.bn1 = nn.GroupNorm(8, 128)
        self.bn2 = nn.GroupNorm(8, 128)
        self.bn3 = nn.GroupNorm(8, 128)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.bn0(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.bn1(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.bn2(x)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.bn3(x)

        x = self.avgpool(x)
        x = x.view(len(x), -1)
        return x

class CNN(nn.Module):
    """
    Two-layer CNN with hidden dimensions determined by hparams.
    """
    def __init__(self, input_shape, hparams):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], hparams['hidden_dim1'], 3, 1, padding=1)
        self.conv2 = nn.Conv2d(hparams['hidden_dim1'], hparams['hidden_dim2'], 3, 1, padding=1)
        
        self.bn1 = nn.BatchNorm2d(hparams['hidden_dim1'])
        self.bn2 = nn.BatchNorm2d(hparams['hidden_dim2'])
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.n_outputs = hparams['hidden_dim2']

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.bn1(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.bn2(x)

        x = self.avgpool(x)
        x = x.view(len(x), -1)
        return x


class ContextNet(nn.Module):
    def __init__(self, input_shape):
        super(ContextNet, self).__init__()

        # Keep same dimensions
        padding = (5 - 1) // 2
        self.context_net = nn.Sequential(
            nn.Conv2d(input_shape[0], 64, 5, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 5, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 1, 5, padding=padding),
        )

    def forward(self, x):
        return self.context_net(x)

class AlexNet(nn.Module):
    """AlexNet with the classifier layer removed"""
    def __init__(self, input_shape, hparams):
        super(AlexNet, self).__init__()
        self.input_shape = input_shape
        self.hparams = hparams
        
        self.features = nn.Sequential(
            nn.Conv2d(input_shape[0], 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(64, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

        # Calculate the correct n_outputs
        with torch.no_grad():
            dummy_input = torch.zeros(1, *input_shape)
            features_output = self.features(dummy_input)
            avgpool_output = self.avgpool(features_output)
            self.n_outputs = avgpool_output.view(avgpool_output.size(0), -1).shape[1]

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        return x


def Classifier(in_features, out_features, dropout=0.5, is_nonlinear=False):
    if is_nonlinear:
        hidden1 = max(in_features // 2, 64)  # Ensure at least 64 neurons
        hidden2 = max(hidden1 // 2, 32)      # Ensure at least 32 neurons
        return torch.nn.Sequential(
            torch.nn.Dropout(p=dropout),
            torch.nn.Linear(in_features, hidden1),
            torch.nn.ReLU(),
            torch.nn.Dropout(p=dropout),
            torch.nn.Linear(hidden1, hidden2),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden2, out_features)
            )
    else:
        return torch.nn.Linear(in_features, out_features)


================================================
FILE: transopt/benchmark/HPO/test_model.py
================================================
import os
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt
from transopt.benchmark.HPO import algorithms
from transopt.benchmark.HPO import datasets

# 定义读取模型的路径
model_path = os.path.expanduser('~/transopt_tmp/output/models/ROBERM_RobCifar10_0/model.pkl')
algorithm_name = 'ROBERM'
dataset_name = 'RobCifar10'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

hparams = {
    'batch_size': 64,
    'nonlinear_classifier': False,
    'lr': 0.001,
    'weight_decay': 0.00001
}

dataset = vars(datasets)[dataset_name]()
algorithm_class = algorithms.get_algorithm_class(algorithm_name)
algorithm = algorithm_class(dataset.input_shape, dataset.num_classes, len(dataset), hparams)

# 加载模型
checkpoint = torch.load(model_path, map_location=device)  # 加载模型到指定设备
algorithm.load_state_dict(checkpoint['model_dict'])
algorithm.to(device)
algorithm.eval()  # 设置为评估模式

# 定义数据转换（将图像转换为 Tensor）
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为 Tensor
])

# 加载 CIFAR-10 测试数据集
test_dataset = dataset.test
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

# 定义将 Tensor 转换为 PIL 图像的工具
to_pil_image = ToPILImage()

# 选择一个测试样本并进行预测
for images, labels in test_loader:
    
    images = images.to(device)
    labels = labels.to(device)

    with torch.no_grad():  # 禁用梯度计算
        outputs, reconstructed_images = algorithm.predict(images)  # 确保 predict 返回解码图像
        _, predicted = torch.max(outputs, 1)

    # 打印预测结果
    print(f"Predicted Label: {predicted.item()}")

    # 将原图和重构图像转换为 PIL 图像
    original_image_pil = to_pil_image(images.squeeze(0).cpu())  # 原图
    reconstructed_image_pil = to_pil_image(reconstructed_images.squeeze(0).cpu())  # 重构图

    # 使用 matplotlib 显示原图和重构图的对比
    fig, axes = plt.subplots(1, 2, figsize=(10, 5))

    # 显示原图
    axes[0].imshow(original_image_pil)
    axes[0].set_title('Original Image')
    axes[0].axis('off')  # 隐藏坐标轴

    # 显示重构图
    axes[1].imshow(reconstructed_image_pil)
    axes[1].set_title(f'Reconstructed Image\nPredicted Label: {predicted.item()}')
    axes[1].axis('off')  # 隐藏坐标轴

    plt.tight_layout()
    plt.savefig('rec.png')

    # 仅显示第一个测试样本
    break

================================================
FILE: transopt/benchmark/HPO/visualization.py
================================================
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from torchvision import datasets, transforms
from torchvision.transforms import AutoAugmentPolicy, AutoAugment, RandAugment
import torch

def get_cifar10_data(transform):
    dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=1000, shuffle=True)
    images, labels = next(iter(dataloader))
    return images.numpy().reshape(1000, -1), labels.numpy()

# Define transforms
transforms_list = {
    'No Augmentation': transforms.ToTensor(),
    'Random Crop': transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
    ]),
    'Random Horizontal Flip': transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    'Color Jitter': transforms.Compose([
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
    ]),
    'Brightness': transforms.Compose([
        transforms.ColorJitter(brightness=0.5),
        transforms.ToTensor(),
    ]),
    'Solarize': transforms.Compose([
        transforms.RandomSolarize(threshold=128),
        transforms.ToTensor(),
    ]),
    'Shear': transforms.Compose([
        transforms.RandomAffine(degrees=0, shear=15),
        transforms.ToTensor(),
    ]),
}

# Prepare data for all transforms
all_data = []
all_labels = []
for name, transform in transforms_list.items():
    print(f"Processing {name}...")
    data, labels = get_cifar10_data(transform)
    all_data.append(data)
    all_labels.append(np.full(labels.shape, list(transforms_list.keys()).index(name)))

# Combine all data
combined_data = np.vstack(all_data)
combined_labels = np.hstack(all_labels)

# Perform t-SNE on combined data
tsne = TSNE(n_components=2, random_state=42)
tsne_results = tsne.fit_transform(combined_data)

# Visualize results
plt.figure(figsize=(16, 16))
scatter = plt.scatter(tsne_results[:, 0], tsne_results[:, 1], c=combined_labels, cmap='tab10')
plt.title('t-SNE Visualization of CIFAR-10 with Different Augmentations')
plt.xlabel('t-SNE feature 1')
plt.ylabel('t-SNE feature 2')

# Add legend
legend_elements = [plt.Line2D([0], [0], marker='o', color='w', label=method, 
                   markerfacecolor=plt.cm.tab10(i/len(transforms_list)), markersize=10)
                   for i, method in enumerate(transforms_list.keys())]
plt.legend(handles=legend_elements, title='Augmentation Methods', loc='center left', bbox_to_anchor=(1, 0.5))

plt.tight_layout()
plt.savefig('cifar10_augmentations_tsne.png', dpi=300, bbox_inches='tight')
plt.show()

print("Visualization complete. Check the output image: cifar10_augmentations_tsne.png")

================================================
FILE: transopt/benchmark/HPO/wide_resnet.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

"""
From https://github.com/meliketoy/wide-resnet.pytorch
"""

import sys

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=True)


def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        init.constant_(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)


class wide_basic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1):
        super(wide_basic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, padding=1, bias=True)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_planes, planes, kernel_size=1, stride=stride,
                    bias=True), )

    def forward(self, x):
        out = self.dropout(self.conv1(F.relu(self.bn1(x))))
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)

        return out


class Wide_ResNet(nn.Module):
    """Wide Resnet with the softmax layer chopped off"""
    def __init__(self, input_shape, depth, widen_factor, dropout_rate):
        super(Wide_ResNet, self).__init__()
        self.in_planes = 16

        assert ((depth - 4) % 6 == 0), 'Wide-resnet depth should be 6n+4'
        n = (depth - 4) / 6
        k = widen_factor

        # print('| Wide-Resnet %dx%d' % (depth, k))
        nStages = [16, 16 * k, 32 * k, 64 * k]

        self.conv1 = conv3x3(input_shape[0], nStages[0])
        self.layer1 = self._wide_layer(
            wide_basic, nStages[1], n, dropout_rate, stride=1)
        self.layer2 = self._wide_layer(
            wide_basic, nStages[2], n, dropout_rate, stride=2)
        self.layer3 = self._wide_layer(
            wide_basic, nStages[3], n, dropout_rate, stride=2)
        self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)

        self.n_outputs = nStages[3]

    def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
        strides = [stride] + [1] * (int(num_blocks) - 1)
        layers = []

        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        return out[:, :, 0, 0]


================================================
FILE: transopt/benchmark/HPOB/HpobBench.py
================================================
import copy

import numpy as np
import json

import os
import matplotlib.pyplot as plt
os.environ['OMP_NUM_THREADS'] = "1"


##/mnt/data/cola/hpob-data
class HPOb():
    def __init__(self, search_space_id, data_set_id, xdim, path='./Benchmark/HPOB/hpob-data'):
        self.name = f'HPOb_{xdim}d_{data_set_id}'
        self.search_space_id = search_space_id
        self.data_set_id = data_set_id
        self.xdim = xdim
        self.query_num = 0
        self.task_type = 'Tabular'
        with open(path + "/meta-test-dataset.json", "r") as f:
            data_set = json.load(f)
            data_set = data_set[search_space_id][data_set_id]

        self.data_set = data_set
        self.RX = [[0,1] for i in range(xdim)]
        self.bounds = np.array([[-1.0] * self.xdim, [1.0] * self.xdim])

        self.unobserved_indexs = list(range(len(data_set['y'])))
        self.observed_indexs = []

        self.data_input = {index: value for index, value in enumerate(data_set['X'])}
        self.data_output ={index: value for index, value in enumerate(data_set['y'])}
        self.unobserved_input = {index: value for index, value in enumerate(data_set['X'])}
        self.unobserved_output = {index: value for index, value in enumerate(data_set['y'])}


    def transfer(self, X):
        return (X + 1) * (self.RX[:, 1] - self.RX[:, 0]) / 2 + (self.RX[:, 0])

    def normalize(self, X):
        return 2 * (X - (self.RX[:, 0])) / (self.RX[:, 1] - self.RX[:, 0]) - 1

    def data_num(self):
        return len(self.unobserved_output)

    def get_var(self, indexs):
        X = [self.unobserved_input[idx] for idx in indexs]
        return  np.array(X)

    def get_idx(self, vars):
        unob_idx = []
        vars = np.array(vars)
        for var in vars:
            for idx in self.unobserved_indexs:
                if np.all(var == self.unobserved_input[idx]):
                    unob_idx.append(idx)

        return  unob_idx

    def get_all_unobserved_var(self):
        return np.array(list(self.unobserved_input.values()))

    def get_all_unobserved_idxs(self):
        return self.unobserved_indexs

    def f(self,X, indexs):
        self.query_num += len(indexs)
        y = []
        for idx in indexs:
            y.append(self.unobserved_output[idx][0])
            del self.unobserved_output[idx]
            del self.unobserved_input[idx]
            self.unobserved_indexs.remove(idx)
            self.observed_indexs.append(idx)
        f = np.array(y)
        return f


dataset_dic = {'4796': ['3549', '3918', '9903', '23'],
               '5527': ['146064', '146065', '9914', '145804', '31', '10101'],
               '5636': ['146064', '145804', '9914', '146065', '10101', '31'],
               '5859': ['9983', '31', '37', '3902', '9977', '125923'], '5860': ['14965', '9976', '3493'],
               '5891': ['9889', '3899', '6566', '9980', '3891', '3492'], '5906': ['9971', '3918'],
               '5965': ['145836', '9914', '3903', '10101', '9889', '49', '9946'],
               '5970': ['37', '3492', '9952', '49', '34536', '14951'],
               '5971': ['10093', '3954', '43', '34536', '9970', '6566'],
               '6766': ['3903', '146064', '145953', '145804', '31', '10101'],
               '6767': ['146065', '145804', '146064', '9914', '9967', '31'],
               '6794': ['145804', '3', '146065', '10101', '9914', '31'],
               '7607': ['14965', '145976', '3896', '3913', '3903', '9946', '9967'],
               '7609': ['145854', '3903', '9967', '145853', '34537', '125923', '145878'],
               '5889': ['9971', '3918']}

def calculate_correlation(x1, y1, X2, Y2):
    # 计算x1与X2之间的欧氏距离
    distances = cdist(x1.reshape(1, -1), X2, metric='euclidean')

    # 找到X2中最近的点的索引
    closest_index = np.argmin(distances)

    # 使用pair-wise统计方法计算相关性
    correlation = np.corrcoef(y1, Y2[closest_index].flatten())[0, 1]

    return correlation


if __name__ == '__main__':
    search_space_id = '6794'
    for data_set_id in dataset_dic[search_space_id]:
        hpo = HPOb(search_space_id=search_space_id, data_set_id=data_set_id, xdim=10,path='./hpob-data')
        data_x = np.array(hpo.data_set['X'])
        data_y = hpo.data_set['y']

        # 对Y进行排序，获取排序后的索引
        sorted_indices = np.argsort(data_y, axis=0)

        # 根据排序后的索引重新排列X
        sorted_X = data_x[sorted_indices[:,0]]
        # 绘制heatmap图
        plt.figure(figsize=(8, 6))
        plt.imshow(sorted_X, aspect='auto', cmap='viridis')
        plt.colorbar()
        plt.title('Heatmap of Sorted X')
        plt.xlabel('Features')
        plt.ylabel('Samples (Sorted by Y)')
        plt.savefig(f'heatmap of sorted X for dataset:{data_set_id}')


================================================
FILE: transopt/benchmark/HPOB/plot.py
================================================
import matplotlib.pyplot as plt
import numpy as np

# 假设你有一个N*1的数组
a = [0.62559]*1000
b = [0.31532]*50
c = [0.22537] * 20
data = np.array([a,b,c])
colors = ['red', 'green', 'blue']
# 绘制箱线图
plt.bar(x = [0.62559, 0.31532, 0.22537], height=[1000,50,20],width = 0.05, color=colors)


# 添加横纵轴标签和标题
plt.xlabel('Value')
plt.ylabel('NUmber of Value')
plt.title('Distribution of HPOBench-B Surrogate model')
plt.savefig('toy')


================================================
FILE: transopt/benchmark/HPOOOD/algorithms.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.autograd as autograd

import copy
import numpy as np
from collections import OrderedDict


from transopt.benchmark.HPOOOD import networks
from transopt.benchmark.HPOOOD.misc import (
    random_pairs_of_minibatches, split_meta_train_test, ParamDict,
    MovingAverage, l2_between_dicts, proj, Nonparametric, SupConLossLambda
)


ALGORITHMS = [
    'ERM',
    'Fish',
    'IRM',
    'GroupDRO',
    'Mixup',
    'MLDG',
    'CORAL',
    'MMD',
    'DANN',
    'CDANN',
    'MTL',
    'SagNet',
    'ARM',
    'VREx',
    'RSC',
    'SD',
    'ANDMask',
    'SANDMask',
    'IGA',
    'SelfReg',
    "Fishr",
    'TRM',
    'IB_ERM',
    'IB_IRM',
    'CAD',
    'CondCAD',
    'Transfer',
    'CausIRL_CORAL',
    'CausIRL_MMD',
    'EQRM',
    'RDM',
    'ADRMX',
]

def get_algorithm_class(algorithm_name):
    """Return the algorithm class with the given name."""
    if algorithm_name not in globals():
        raise NotImplementedError("Algorithm not found: {}".format(algorithm_name))
    return globals()[algorithm_name]

class Algorithm(torch.nn.Module):
    """
    A subclass of Algorithm implements a domain generalization algorithm.
    Subclasses should implement the following:
    - update()
    - predict()
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(Algorithm, self).__init__()
        self.hparams = hparams

    def update(self, minibatches, unlabeled=None):
        """
        Perform one update step, given a list of (x, y) tuples for all
        environments.

        Admits an optional list of unlabeled minibatches from the test domains,
        when task is domain_adaptation.
        """
        raise NotImplementedError

    def predict(self, x):
        raise NotImplementedError

class ERM(Algorithm):
    """
    Empirical Risk Minimization (ERM)
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(ERM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])

        self.network = nn.Sequential(self.featurizer, self.classifier)
        self.optimizer = torch.optim.Adam(
            self.network.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay'],
        )

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        loss = F.cross_entropy(self.predict(all_x), all_y)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}

    def predict(self, x):
        return self.network(x)


class Fish(Algorithm):
    """
    Implementation of Fish, as seen in Gradient Matching for Domain
    Generalization, Shi et al. 2021.
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(Fish, self).__init__(input_shape, num_classes, num_domains,
                                   hparams)
        self.input_shape = input_shape
        self.num_classes = num_classes

        self.network = networks.WholeFish(input_shape, num_classes, hparams)
        self.optimizer = torch.optim.Adam(
            self.network.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        self.optimizer_inner_state = None

    def create_clone(self, device):
        self.network_inner = networks.WholeFish(self.input_shape, self.num_classes, self.hparams,
                                            weights=self.network.state_dict()).to(device)
        self.optimizer_inner = torch.optim.Adam(
            self.network_inner.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        if self.optimizer_inner_state is not None:
            self.optimizer_inner.load_state_dict(self.optimizer_inner_state)

    def fish(self, meta_weights, inner_weights, lr_meta):
        meta_weights = ParamDict(meta_weights)
        inner_weights = ParamDict(inner_weights)
        meta_weights += lr_meta * (inner_weights - meta_weights)
        return meta_weights

    def update(self, minibatches, unlabeled=None):
        self.create_clone(minibatches[0][0].device)

        for x, y in minibatches:
            loss = F.cross_entropy(self.network_inner(x), y)
            self.optimizer_inner.zero_grad()
            loss.backward()
            self.optimizer_inner.step()

        self.optimizer_inner_state = self.optimizer_inner.state_dict()
        meta_weights = self.fish(
            meta_weights=self.network.state_dict(),
            inner_weights=self.network_inner.state_dict(),
            lr_meta=self.hparams["meta_lr"]
        )
        self.network.reset_weights(meta_weights)

        return {'loss': loss.item()}

    def predict(self, x):
        return self.network(x)


class ARM(ERM):
    """ Adaptive Risk Minimization (ARM) """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        original_input_shape = input_shape
        input_shape = (1 + original_input_shape[0],) + original_input_shape[1:]
        super(ARM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.context_net = networks.ContextNet(original_input_shape)
        self.support_size = hparams['batch_size']

    def predict(self, x):
        batch_size, c, h, w = x.shape
        if batch_size % self.support_size == 0:
            meta_batch_size = batch_size // self.support_size
            support_size = self.support_size
        else:
            meta_batch_size, support_size = 1, batch_size
        context = self.context_net(x)
        context = context.reshape((meta_batch_size, support_size, 1, h, w))
        context = context.mean(dim=1)
        context = torch.repeat_interleave(context, repeats=support_size, dim=0)
        x = torch.cat([x, context], dim=1)
        return self.network(x)


class AbstractDANN(Algorithm):
    """Domain-Adversarial Neural Networks (abstract class)"""

    def __init__(self, input_shape, num_classes, num_domains,
                 hparams, conditional, class_balance):

        super(AbstractDANN, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)

        self.register_buffer('update_count', torch.tensor([0]))
        self.conditional = conditional
        self.class_balance = class_balance

        # Algorithms
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])
        self.discriminator = networks.MLP(self.featurizer.n_outputs,
            num_domains, self.hparams)
        self.class_embeddings = nn.Embedding(num_classes,
            self.featurizer.n_outputs)

        # Optimizers
        self.disc_opt = torch.optim.Adam(
            (list(self.discriminator.parameters()) +
                list(self.class_embeddings.parameters())),
            lr=self.hparams["lr_d"],
            weight_decay=self.hparams['weight_decay_d'],
            betas=(self.hparams['beta1'], 0.9))

        self.gen_opt = torch.optim.Adam(
            (list(self.featurizer.parameters()) +
                list(self.classifier.parameters())),
            lr=self.hparams["lr_g"],
            weight_decay=self.hparams['weight_decay_g'],
            betas=(self.hparams['beta1'], 0.9))

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        self.update_count += 1
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        all_z = self.featurizer(all_x)
        if self.conditional:
            disc_input = all_z + self.class_embeddings(all_y)
        else:
            disc_input = all_z
        disc_out = self.discriminator(disc_input)
        disc_labels = torch.cat([
            torch.full((x.shape[0], ), i, dtype=torch.int64, device=device)
            for i, (x, y) in enumerate(minibatches)
        ])

        if self.class_balance:
            y_counts = F.one_hot(all_y).sum(dim=0)
            weights = 1. / (y_counts[all_y] * y_counts.shape[0]).float()
            disc_loss = F.cross_entropy(disc_out, disc_labels, reduction='none')
            disc_loss = (weights * disc_loss).sum()
        else:
            disc_loss = F.cross_entropy(disc_out, disc_labels)

        input_grad = autograd.grad(
            F.cross_entropy(disc_out, disc_labels, reduction='sum'),
            [disc_input], create_graph=True)[0]
        grad_penalty = (input_grad**2).sum(dim=1).mean(dim=0)
        disc_loss += self.hparams['grad_penalty'] * grad_penalty

        d_steps_per_g = self.hparams['d_steps_per_g_step']
        if (self.update_count.item() % (1+d_steps_per_g) < d_steps_per_g):

            self.disc_opt.zero_grad()
            disc_loss.backward()
            self.disc_opt.step()
            return {'disc_loss': disc_loss.item()}
        else:
            all_preds = self.classifier(all_z)
            classifier_loss = F.cross_entropy(all_preds, all_y)
            gen_loss = (classifier_loss +
                        (self.hparams['lambda'] * -disc_loss))
            self.disc_opt.zero_grad()
            self.gen_opt.zero_grad()
            gen_loss.backward()
            self.gen_opt.step()
            return {'gen_loss': gen_loss.item()}

    def predict(self, x):
        return self.classifier(self.featurizer(x))

class DANN(AbstractDANN):
    """Unconditional DANN"""
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(DANN, self).__init__(input_shape, num_classes, num_domains,
            hparams, conditional=False, class_balance=False)


class CDANN(AbstractDANN):
    """Conditional DANN"""
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CDANN, self).__init__(input_shape, num_classes, num_domains,
            hparams, conditional=True, class_balance=True)


class IRM(ERM):
    """Invariant Risk Minimization"""

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(IRM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.register_buffer('update_count', torch.tensor([0]))

    @staticmethod
    def _irm_penalty(logits, y):
        device = "cuda" if logits[0][0].is_cuda else "cpu"
        scale = torch.tensor(1.).to(device).requires_grad_()
        loss_1 = F.cross_entropy(logits[::2] * scale, y[::2])
        loss_2 = F.cross_entropy(logits[1::2] * scale, y[1::2])
        grad_1 = autograd.grad(loss_1, [scale], create_graph=True)[0]
        grad_2 = autograd.grad(loss_2, [scale], create_graph=True)[0]
        result = torch.sum(grad_1 * grad_2)
        return result

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        penalty_weight = (self.hparams['irm_lambda'] if self.update_count
                          >= self.hparams['irm_penalty_anneal_iters'] else
                          1.0)
        nll = 0.
        penalty = 0.

        all_x = torch.cat([x for x, y in minibatches])
        all_logits = self.network(all_x)
        all_logits_idx = 0
        for i, (x, y) in enumerate(minibatches):
            logits = all_logits[all_logits_idx:all_logits_idx + x.shape[0]]
            all_logits_idx += x.shape[0]
            nll += F.cross_entropy(logits, y)
            penalty += self._irm_penalty(logits, y)
        nll /= len(minibatches)
        penalty /= len(minibatches)
        loss = nll + (penalty_weight * penalty)

        if self.update_count == self.hparams['irm_penalty_anneal_iters']:
            # Reset Adam, because it doesn't like the sharp jump in gradient
            # magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                self.network.parameters(),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay'])

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_count += 1
        return {'loss': loss.item(), 'nll': nll.item(),
            'penalty': penalty.item()}

class RDM(ERM):
    """RDM - Domain Generalization via Risk Distribution Matching (https://arxiv.org/abs/2310.18598) """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(RDM, self).__init__(input_shape, num_classes, num_domains, hparams)
        self.register_buffer('update_count', torch.tensor([0]))

    def my_cdist(self, x1, x2): 
        x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
        x2_norm = x2.pow(2).sum(dim=-1, keepdim=True) 

        res = torch.addmm(x2_norm.transpose(-2, -1),
                          x1,
                          x2.transpose(-2, -1), alpha=-2).add_(x1_norm)
        return res.clamp_min_(1e-30)

    def gaussian_kernel(self, x, y, gamma=[0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                                           1000]):
        D = self.my_cdist(x, y)
        K = torch.zeros_like(D)

        for g in gamma:
            K.add_(torch.exp(D.mul(-g)))

        return K

    def mmd(self, x, y):
        Kxx = self.gaussian_kernel(x, x).mean()
        Kyy = self.gaussian_kernel(y, y).mean()
        Kxy = self.gaussian_kernel(x, y).mean()
        return Kxx + Kyy - 2 * Kxy
    
    @staticmethod
    def _moment_penalty(p_mean, q_mean, p_var, q_var):
        return (p_mean - q_mean) ** 2 + (p_var - q_var) ** 2
    
    @staticmethod
    def _kl_penalty(p_mean, q_mean, p_var, q_var):
        return 0.5 * torch.log(q_var/p_var)+ ((p_var)+(p_mean-q_mean)**2)/(2*q_var) - 0.5
    
    def _js_penalty(self, p_mean, q_mean, p_var, q_var):
        m_mean = (p_mean + q_mean) / 2
        m_var = (p_var + q_var) / 4
        
        return self._kl_penalty(p_mean, m_mean, p_var, m_var) + self._kl_penalty(q_mean, m_mean, q_var, m_var)
    
    def update(self, minibatches, unlabeled=None, held_out_minibatches=None):
        matching_penalty_weight = (self.hparams['rdm_lambda'] if self.update_count
                          >= self.hparams['rdm_penalty_anneal_iters'] else
                          0.)
        
        variance_penalty_weight = (self.hparams['variance_weight'] if self.update_count
                          >= self.hparams['rdm_penalty_anneal_iters'] else
                          0.)

        all_x = torch.cat([x for x, y in minibatches])
        all_logits = self.predict(all_x)
        losses = torch.zeros(len(minibatches)).cuda()
        all_logits_idx = 0
        all_confs_envs = None
        
        for i, (x, y) in enumerate(minibatches):
            logits = all_logits[all_logits_idx:all_logits_idx + x.shape[0]]
            all_logits_idx += x.shape[0]
            losses[i] = F.cross_entropy(logits, y)
            
            nll = F.cross_entropy(logits, y, reduction = "none").unsqueeze(0)
        
            if all_confs_envs is None:
                all_confs_envs = nll
            else:
                all_confs_envs = torch.cat([all_confs_envs, nll], dim = 0)
                
        erm_loss = losses.mean()
        
        ## squeeze the risks
        all_confs_envs = torch.squeeze(all_confs_envs)
        
        ## find the worst domain
        worst_env_idx = torch.argmax(torch.clone(losses))
        all_confs_worst_env = all_confs_envs[worst_env_idx]

        ## flatten the risk
        all_confs_worst_env_flat = torch.flatten(all_confs_worst_env)
        all_confs_all_envs_flat = torch.flatten(all_confs_envs)
    
        matching_penalty = self.mmd(all_confs_worst_env_flat.unsqueeze(1), all_confs_all_envs_flat.unsqueeze(1)) 
        
        ## variance penalty
        variance_penalty = torch.var(all_confs_all_envs_flat)
        variance_penalty += torch.var(all_confs_worst_env_flat)
        
        total_loss = erm_loss + matching_penalty_weight * matching_penalty + variance_penalty_weight * variance_penalty
            
        if self.update_count == self.hparams['rdm_penalty_anneal_iters']:
            # Reset Adam, because it doesn't like the sharp jump in gradient
            # magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                self.network.parameters(),
                lr=self.hparams["rdm_lr"],
                weight_decay=self.hparams['weight_decay'])
        
        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()
        
        self.update_count += 1

        return {'update_count': self.update_count.item(), 'total_loss': total_loss.item(), 'erm_loss': erm_loss.item(), 'matching_penalty': matching_penalty.item(), 'variance_penalty': variance_penalty.item(), 'rdm_lambda' : self.hparams['rdm_lambda']}

class VREx(ERM):
    """V-REx algorithm from http://arxiv.org/abs/2003.00688"""
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(VREx, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.register_buffer('update_count', torch.tensor([0]))

    def update(self, minibatches, unlabeled=None):
        if self.update_count >= self.hparams["vrex_penalty_anneal_iters"]:
            penalty_weight = self.hparams["vrex_lambda"]
        else:
            penalty_weight = 1.0

        nll = 0.

        all_x = torch.cat([x for x, y in minibatches])
        all_logits = self.network(all_x)
        all_logits_idx = 0
        losses = torch.zeros(len(minibatches))
        for i, (x, y) in enumerate(minibatches):
            logits = all_logits[all_logits_idx:all_logits_idx + x.shape[0]]
            all_logits_idx += x.shape[0]
            nll = F.cross_entropy(logits, y)
            losses[i] = nll

        mean = losses.mean()
        penalty = ((losses - mean) ** 2).mean()
        loss = mean + penalty_weight * penalty

        if self.update_count == self.hparams['vrex_penalty_anneal_iters']:
            # Reset Adam (like IRM), because it doesn't like the sharp jump in
            # gradient magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                self.network.parameters(),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay'])

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_count += 1
        return {'loss': loss.item(), 'nll': nll.item(),
                'penalty': penalty.item()}


class Mixup(ERM):
    """
    Mixup of minibatches from different domains
    https://arxiv.org/pdf/2001.00677.pdf
    https://arxiv.org/pdf/1912.01805.pdf
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(Mixup, self).__init__(input_shape, num_classes, num_domains,
                                    hparams)

    def update(self, minibatches, unlabeled=None):
        objective = 0

        for (xi, yi), (xj, yj) in random_pairs_of_minibatches(minibatches):
            lam = np.random.beta(self.hparams["mixup_alpha"],
                                 self.hparams["mixup_alpha"])

            x = lam * xi + (1 - lam) * xj
            predictions = self.predict(x)

            objective += lam * F.cross_entropy(predictions, yi)
            objective += (1 - lam) * F.cross_entropy(predictions, yj)

        objective /= len(minibatches)

        self.optimizer.zero_grad()
        objective.backward()
        self.optimizer.step()

        return {'loss': objective.item()}


class GroupDRO(ERM):
    """
    Robust ERM minimizes the error at the worst minibatch
    Algorithm 1 from [https://arxiv.org/pdf/1911.08731.pdf]
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(GroupDRO, self).__init__(input_shape, num_classes, num_domains,
                                        hparams)
        self.register_buffer("q", torch.Tensor())

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"

        if not len(self.q):
            self.q = torch.ones(len(minibatches)).to(device)

        losses = torch.zeros(len(minibatches)).to(device)

        for m in range(len(minibatches)):
            x, y = minibatches[m]
            losses[m] = F.cross_entropy(self.predict(x), y)
            self.q[m] *= (self.hparams["groupdro_eta"] * losses[m].data).exp()

        self.q /= self.q.sum()

        loss = torch.dot(losses, self.q)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}


class MLDG(ERM):
    """
    Model-Agnostic Meta-Learning
    Algorithm 1 / Equation (3) from: https://arxiv.org/pdf/1710.03463.pdf
    Related: https://arxiv.org/pdf/1703.03400.pdf
    Related: https://arxiv.org/pdf/1910.13580.pdf
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(MLDG, self).__init__(input_shape, num_classes, num_domains,
                                   hparams)
        self.num_meta_test = hparams['n_meta_test']

    def update(self, minibatches, unlabeled=None):
        """
        Terms being computed:
            * Li = Loss(xi, yi, params)
            * Gi = Grad(Li, params)

            * Lj = Loss(xj, yj, Optimizer(params, grad(Li, params)))
            * Gj = Grad(Lj, params)

            * params = Optimizer(params, Grad(Li + beta * Lj, params))
            *        = Optimizer(params, Gi + beta * Gj)

        That is, when calling .step(), we want grads to be Gi + beta * Gj

        For computational efficiency, we do not compute second derivatives.
        """
        num_mb = len(minibatches)
        objective = 0

        self.optimizer.zero_grad()
        for p in self.network.parameters():
            if p.grad is None:
                p.grad = torch.zeros_like(p)

        for (xi, yi), (xj, yj) in split_meta_train_test(minibatches, self.num_meta_test):
            # fine tune clone-network on task "i"
            inner_net = copy.deepcopy(self.network)

            inner_opt = torch.optim.Adam(
                inner_net.parameters(),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay']
            )

            inner_obj = F.cross_entropy(inner_net(xi), yi)

            inner_opt.zero_grad()
            inner_obj.backward()
            inner_opt.step()

            # The network has now accumulated gradients Gi
            # The clone-network has now parameters P - lr * Gi
            for p_tgt, p_src in zip(self.network.parameters(),
                                    inner_net.parameters()):
                if p_src.grad is not None:
                    p_tgt.grad.data.add_(p_src.grad.data / num_mb)

            # `objective` is populated for reporting purposes
            objective += inner_obj.item()

            # this computes Gj on the clone-network
            loss_inner_j = F.cross_entropy(inner_net(xj), yj)
            grad_inner_j = autograd.grad(loss_inner_j, inner_net.parameters(),
                allow_unused=True)

            # `objective` is populated for reporting purposes
            objective += (self.hparams['mldg_beta'] * loss_inner_j).item()

            for p, g_j in zip(self.network.parameters(), grad_inner_j):
                if g_j is not None:
                    p.grad.data.add_(
                        self.hparams['mldg_beta'] * g_j.data / num_mb)

            # The network has now accumulated gradients Gi + beta * Gj
            # Repeat for all train-test splits, do .step()

        objective /= len(minibatches)

        self.optimizer.step()

        return {'loss': objective}

    # This commented "update" method back-propagates through the gradients of
    # the inner update, as suggested in the original MAML paper.  However, this
    # is twice as expensive as the uncommented "update" method, which does not
    # compute second-order derivatives, implementing the First-Order MAML
    # method (FOMAML) described in the original MAML paper.

    # def update(self, minibatches, unlabeled=None):
    #     objective = 0
    #     beta = self.hparams["beta"]
    #     inner_iterations = self.hparams["inner_iterations"]

    #     self.optimizer.zero_grad()

    #     with higher.innerloop_ctx(self.network, self.optimizer,
    #         copy_initial_weights=False) as (inner_network, inner_optimizer):

    #         for (xi, yi), (xj, yj) in random_pairs_of_minibatches(minibatches):
    #             for inner_iteration in range(inner_iterations):
    #                 li = F.cross_entropy(inner_network(xi), yi)
    #                 inner_optimizer.step(li)
    #
    #             objective += F.cross_entropy(self.network(xi), yi)
    #             objective += beta * F.cross_entropy(inner_network(xj), yj)

    #         objective /= len(minibatches)
    #         objective.backward()
    #
    #     self.optimizer.step()
    #
    #     return objective


class AbstractMMD(ERM):
    """
    Perform ERM while matching the pair-wise domain feature distributions
    using MMD (abstract class)
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams, gaussian):
        super(AbstractMMD, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        if gaussian:
            self.kernel_type = "gaussian"
        else:
            self.kernel_type = "mean_cov"

    def my_cdist(self, x1, x2):
        x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
        x2_norm = x2.pow(2).sum(dim=-1, keepdim=True)
        res = torch.addmm(x2_norm.transpose(-2, -1),
                          x1,
                          x2.transpose(-2, -1), alpha=-2).add_(x1_norm)
        return res.clamp_min_(1e-30)

    def gaussian_kernel(self, x, y, gamma=[0.001, 0.01, 0.1, 1, 10, 100,
                                           1000]):
        D = self.my_cdist(x, y)
        K = torch.zeros_like(D)

        for g in gamma:
            K.add_(torch.exp(D.mul(-g)))

        return K

    def mmd(self, x, y):
        if self.kernel_type == "gaussian":
            Kxx = self.gaussian_kernel(x, x).mean()
            Kyy = self.gaussian_kernel(y, y).mean()
            Kxy = self.gaussian_kernel(x, y).mean()
            return Kxx + Kyy - 2 * Kxy
        else:
            mean_x = x.mean(0, keepdim=True)
            mean_y = y.mean(0, keepdim=True)
            cent_x = x - mean_x
            cent_y = y - mean_y
            cova_x = (cent_x.t() @ cent_x) / (len(x) - 1)
            cova_y = (cent_y.t() @ cent_y) / (len(y) - 1)

            mean_diff = (mean_x - mean_y).pow(2).mean()
            cova_diff = (cova_x - cova_y).pow(2).mean()

            return mean_diff + cova_diff

    def update(self, minibatches, unlabeled=None):
        objective = 0
        penalty = 0
        nmb = len(minibatches)

        features = [self.featurizer(xi) for xi, _ in minibatches]
        classifs = [self.classifier(fi) for fi in features]
        targets = [yi for _, yi in minibatches]

        for i in range(nmb):
            objective += F.cross_entropy(classifs[i], targets[i])
            for j in range(i + 1, nmb):
                penalty += self.mmd(features[i], features[j])

        objective /= nmb
        if nmb > 1:
            penalty /= (nmb * (nmb - 1) / 2)

        self.optimizer.zero_grad()
        (objective + (self.hparams['mmd_gamma']*penalty)).backward()
        self.optimizer.step()

        if torch.is_tensor(penalty):
            penalty = penalty.item()

        return {'loss': objective.item(), 'penalty': penalty}


class MMD(AbstractMMD):
    """
    MMD using Gaussian kernel
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(MMD, self).__init__(input_shape, num_classes,
                                          num_domains, hparams, gaussian=True)


class CORAL(AbstractMMD):
    """
    MMD using mean and covariance difference
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CORAL, self).__init__(input_shape, num_classes,
                                         num_domains, hparams, gaussian=False)


class MTL(Algorithm):
    """
    A neural network version of
    Domain Generalization by Marginal Transfer Learning
    (https://arxiv.org/abs/1711.07910)
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(MTL, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs * 2,
            num_classes,
            self.hparams['nonlinear_classifier'])
        self.optimizer = torch.optim.Adam(
            list(self.featurizer.parameters()) +\
            list(self.classifier.parameters()),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )

        self.register_buffer('embeddings',
                             torch.zeros(num_domains,
                                         self.featurizer.n_outputs))

        self.ema = self.hparams['mtl_ema']

    def update(self, minibatches, unlabeled=None):
        loss = 0
        for env, (x, y) in enumerate(minibatches):
            loss += F.cross_entropy(self.predict(x, env), y)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}

    def update_embeddings_(self, features, env=None):
        return_embedding = features.mean(0)

        if env is not None:
            return_embedding = self.ema * return_embedding +\
                               (1 - self.ema) * self.embeddings[env]

            self.embeddings[env] = return_embedding.clone().detach()

        return return_embedding.view(1, -1).repeat(len(features), 1)

    def predict(self, x, env=None):
        features = self.featurizer(x)
        embedding = self.update_embeddings_(features, env).normal_()
        return self.classifier(torch.cat((features, embedding), 1))

class SagNet(Algorithm):
    """
    Style Agnostic Network
    Algorithm 1 from: https://arxiv.org/abs/1910.11645
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(SagNet, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        # featurizer network
        self.network_f = networks.Featurizer(input_shape, self.hparams)
        # content network
        self.network_c = networks.Classifier(
            self.network_f.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])
        # style network
        self.network_s = networks.Classifier(
            self.network_f.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])

        # # This commented block of code implements something closer to the
        # # original paper, but is specific to ResNet and puts in disadvantage
        # # the other algorithms.
        # resnet_c = networks.Featurizer(input_shape, self.hparams)
        # resnet_s = networks.Featurizer(input_shape, self.hparams)
        # # featurizer network
        # self.network_f = torch.nn.Sequential(
        #         resnet_c.network.conv1,
        #         resnet_c.network.bn1,
        #         resnet_c.network.relu,
        #         resnet_c.network.maxpool,
        #         resnet_c.network.layer1,
        #         resnet_c.network.layer2,
        #         resnet_c.network.layer3)
        # # content network
        # self.network_c = torch.nn.Sequential(
        #         resnet_c.network.layer4,
        #         resnet_c.network.avgpool,
        #         networks.Flatten(),
        #         resnet_c.network.fc)
        # # style network
        # self.network_s = torch.nn.Sequential(
        #         resnet_s.network.layer4,
        #         resnet_s.network.avgpool,
        #         networks.Flatten(),
        #         resnet_s.network.fc)

        def opt(p):
            return torch.optim.Adam(p, lr=hparams["lr"],
                    weight_decay=hparams["weight_decay"])

        self.optimizer_f = opt(self.network_f.parameters())
        self.optimizer_c = opt(self.network_c.parameters())
        self.optimizer_s = opt(self.network_s.parameters())
        self.weight_adv = hparams["sag_w_adv"]

    def forward_c(self, x):
        # learning content network on randomized style
        return self.network_c(self.randomize(self.network_f(x), "style"))

    def forward_s(self, x):
        # learning style network on randomized content
        return self.network_s(self.randomize(self.network_f(x), "content"))

    def randomize(self, x, what="style", eps=1e-5):
        device = "cuda" if x.is_cuda else "cpu"
        sizes = x.size()
        alpha = torch.rand(sizes[0], 1).to(device)

        if len(sizes) == 4:
            x = x.view(sizes[0], sizes[1], -1)
            alpha = alpha.unsqueeze(-1)

        mean = x.mean(-1, keepdim=True)
        var = x.var(-1, keepdim=True)

        x = (x - mean) / (var + eps).sqrt()

        idx_swap = torch.randperm(sizes[0])
        if what == "style":
            mean = alpha * mean + (1 - alpha) * mean[idx_swap]
            var = alpha * var + (1 - alpha) * var[idx_swap]
        else:
            x = x[idx_swap].detach()

        x = x * (var + eps).sqrt() + mean
        return x.view(*sizes)

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])

        # learn content
        self.optimizer_f.zero_grad()
        self.optimizer_c.zero_grad()
        loss_c = F.cross_entropy(self.forward_c(all_x), all_y)
        loss_c.backward()
        self.optimizer_f.step()
        self.optimizer_c.step()

        # learn style
        self.optimizer_s.zero_grad()
        loss_s = F.cross_entropy(self.forward_s(all_x), all_y)
        loss_s.backward()
        self.optimizer_s.step()

        # learn adversary
        self.optimizer_f.zero_grad()
        loss_adv = -F.log_softmax(self.forward_s(all_x), dim=1).mean(1).mean()
        loss_adv = loss_adv * self.weight_adv
        loss_adv.backward()
        self.optimizer_f.step()

        return {'loss_c': loss_c.item(), 'loss_s': loss_s.item(),
                'loss_adv': loss_adv.item()}

    def predict(self, x):
        return self.network_c(self.network_f(x))


class RSC(ERM):
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(RSC, self).__init__(input_shape, num_classes, num_domains,
                                   hparams)
        self.drop_f = (1 - hparams['rsc_f_drop_factor']) * 100
        self.drop_b = (1 - hparams['rsc_b_drop_factor']) * 100
        self.num_classes = num_classes

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"

        # inputs
        all_x = torch.cat([x for x, y in minibatches])
        # labels
        all_y = torch.cat([y for _, y in minibatches])
        # one-hot labels
        all_o = torch.nn.functional.one_hot(all_y, self.num_classes)
        # features
        all_f = self.featurizer(all_x)
        # predictions
        all_p = self.classifier(all_f)

        # Equation (1): compute gradients with respect to representation
        all_g = autograd.grad((all_p * all_o).sum(), all_f)[0]

        # Equation (2): compute top-gradient-percentile mask
        percentiles = np.percentile(all_g.cpu(), self.drop_f, axis=1)
        percentiles = torch.Tensor(percentiles)
        percentiles = percentiles.unsqueeze(1).repeat(1, all_g.size(1))
        mask_f = all_g.lt(percentiles.to(device)).float()

        # Equation (3): mute top-gradient-percentile activations
        all_f_muted = all_f * mask_f

        # Equation (4): compute muted predictions
        all_p_muted = self.classifier(all_f_muted)

        # Section 3.3: Batch Percentage
        all_s = F.softmax(all_p, dim=1)
        all_s_muted = F.softmax(all_p_muted, dim=1)
        changes = (all_s * all_o).sum(1) - (all_s_muted * all_o).sum(1)
        percentile = np.percentile(changes.detach().cpu(), self.drop_b)
        mask_b = changes.lt(percentile).float().view(-1, 1)
        mask = torch.logical_or(mask_f, mask_b).float()

        # Equations (3) and (4) again, this time mutting over examples
        all_p_muted_again = self.classifier(all_f * mask)

        # Equation (5): update
        loss = F.cross_entropy(all_p_muted_again, all_y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}


class SD(ERM):
    """
    Gradient Starvation: A Learning Proclivity in Neural Networks
    Equation 25 from [https://arxiv.org/pdf/2011.09468.pdf]
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(SD, self).__init__(input_shape, num_classes, num_domains,
                                        hparams)
        self.sd_reg = hparams["sd_reg"]

    def update(self, minibatches, unlabeled=None):
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        all_p = self.predict(all_x)

        loss = F.cross_entropy(all_p, all_y)
        penalty = (all_p ** 2).mean()
        objective = loss + self.sd_reg * penalty

        self.optimizer.zero_grad()
        objective.backward()
        self.optimizer.step()

        return {'loss': loss.item(), 'penalty': penalty.item()}

class ANDMask(ERM):
    """
    Learning Explanations that are Hard to Vary [https://arxiv.org/abs/2009.00329]
    AND-Mask implementation from [https://github.com/gibipara92/learning-explanations-hard-to-vary]
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(ANDMask, self).__init__(input_shape, num_classes, num_domains, hparams)

        self.tau = hparams["tau"]

    def update(self, minibatches, unlabeled=None):
        mean_loss = 0
        param_gradients = [[] for _ in self.network.parameters()]
        for i, (x, y) in enumerate(minibatches):
            logits = self.network(x)

            env_loss = F.cross_entropy(logits, y)
            mean_loss += env_loss.item() / len(minibatches)

            env_grads = autograd.grad(env_loss, self.network.parameters())
            for grads, env_grad in zip(param_gradients, env_grads):
                grads.append(env_grad)

        self.optimizer.zero_grad()
        self.mask_grads(self.tau, param_gradients, self.network.parameters())
        self.optimizer.step()

        return {'loss': mean_loss}

    def mask_grads(self, tau, gradients, params):

        for param, grads in zip(params, gradients):
            grads = torch.stack(grads, dim=0)
            grad_signs = torch.sign(grads)
            mask = torch.mean(grad_signs, dim=0).abs() >= self.tau
            mask = mask.to(torch.float32)
            avg_grad = torch.mean(grads, dim=0)

            mask_t = (mask.sum() / mask.numel())
            param.grad = mask * avg_grad
            param.grad *= (1. / (1e-10 + mask_t))

        return 0

class IGA(ERM):
    """
    Inter-environmental Gradient Alignment
    From https://arxiv.org/abs/2008.01883v2
    """

    def __init__(self, in_features, num_classes, num_domains, hparams):
        super(IGA, self).__init__(in_features, num_classes, num_domains, hparams)

    def update(self, minibatches, unlabeled=None):
        total_loss = 0
        grads = []
        for i, (x, y) in enumerate(minibatches):
            logits = self.network(x)

            env_loss = F.cross_entropy(logits, y)
            total_loss += env_loss

            env_grad = autograd.grad(env_loss, self.network.parameters(),
                                        create_graph=True)

            grads.append(env_grad)

        mean_loss = total_loss / len(minibatches)
        mean_grad = autograd.grad(mean_loss, self.network.parameters(),
                                        retain_graph=True)

        # compute trace penalty
        penalty_value = 0
        for grad in grads:
            for g, mean_g in zip(grad, mean_grad):
                penalty_value += (g - mean_g).pow(2).sum()

        objective = mean_loss + self.hparams['penalty'] * penalty_value

        self.optimizer.zero_grad()
        objective.backward()
        self.optimizer.step()

        return {'loss': mean_loss.item(), 'penalty': penalty_value.item()}


class SelfReg(ERM):
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(SelfReg, self).__init__(input_shape, num_classes, num_domains,
                                   hparams)
        self.num_classes = num_classes
        self.MSEloss = nn.MSELoss()
        input_feat_size = self.featurizer.n_outputs
        hidden_size = input_feat_size if input_feat_size==2048 else input_feat_size*2

        self.cdpl = nn.Sequential(
                            nn.Linear(input_feat_size, hidden_size),
                            nn.BatchNorm1d(hidden_size),
                            nn.ReLU(inplace=True),
                            nn.Linear(hidden_size, hidden_size),
                            nn.BatchNorm1d(hidden_size),
                            nn.ReLU(inplace=True),
                            nn.Linear(hidden_size, input_feat_size),
                            nn.BatchNorm1d(input_feat_size)
        )

    def update(self, minibatches, unlabeled=None):

        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for _, y in minibatches])

        lam = np.random.beta(0.5, 0.5)

        batch_size = all_y.size()[0]

        # cluster and order features into same-class group
        with torch.no_grad():
            sorted_y, indices = torch.sort(all_y)
            sorted_x = torch.zeros_like(all_x)
            for idx, order in enumerate(indices):
                sorted_x[idx] = all_x[order]
            intervals = []
            ex = 0
            for idx, val in enumerate(sorted_y):
                if ex==val:
                    continue
                intervals.append(idx)
                ex = val
            intervals.append(batch_size)

            all_x = sorted_x
            all_y = sorted_y

        feat = self.featurizer(all_x)
        proj = self.cdpl(feat)

        output = self.classifier(feat)

        # shuffle
        output_2 = torch.zeros_like(output)
        feat_2 = torch.zeros_like(proj)
        output_3 = torch.zeros_like(output)
        feat_3 = torch.zeros_like(proj)
        ex = 0
        for end in intervals:
            shuffle_indices = torch.randperm(end-ex)+ex
            shuffle_indices2 = torch.randperm(end-ex)+ex
            for idx in range(end-ex):
                output_2[idx+ex] = output[shuffle_indices[idx]]
                feat_2[idx+ex] = proj[shuffle_indices[idx]]
                output_3[idx+ex] = output[shuffle_indices2[idx]]
                feat_3[idx+ex] = proj[shuffle_indices2[idx]]
            ex = end

        # mixup
        output_3 = lam*output_2 + (1-lam)*output_3
        feat_3 = lam*feat_2 + (1-lam)*feat_3

        # regularization
        L_ind_logit = self.MSEloss(output, output_2)
        L_hdl_logit = self.MSEloss(output, output_3)
        L_ind_feat = 0.3 * self.MSEloss(feat, feat_2)
        L_hdl_feat = 0.3 * self.MSEloss(feat, feat_3)

        cl_loss = F.cross_entropy(output, all_y)
        C_scale = min(cl_loss.item(), 1.)
        loss = cl_loss + C_scale*(lam*(L_ind_logit + L_ind_feat)+(1-lam)*(L_hdl_logit + L_hdl_feat))

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return {'loss': loss.item()}


class SANDMask(ERM):
    """
    SAND-mask: An Enhanced Gradient Masking Strategy for the Discovery of Invariances in Domain Generalization
    <https://arxiv.org/abs/2106.02266>
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(SANDMask, self).__init__(input_shape, num_classes, num_domains, hparams)

        self.tau = hparams["tau"]
        self.k = hparams["k"]
        betas = (0.9, 0.999)
        self.optimizer = torch.optim.Adam(
            self.network.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay'],
            betas=betas
        )

        self.register_buffer('update_count', torch.tensor([0]))

    def update(self, minibatches, unlabeled=None):

        mean_loss = 0
        param_gradients = [[] for _ in self.network.parameters()]
        for i, (x, y) in enumerate(minibatches):
            logits = self.network(x)

            env_loss = F.cross_entropy(logits, y)
            mean_loss += env_loss.item() / len(minibatches)
            env_grads = autograd.grad(env_loss, self.network.parameters(), retain_graph=True)
            for grads, env_grad in zip(param_gradients, env_grads):
                grads.append(env_grad)

        self.optimizer.zero_grad()
        # gradient masking applied here
        self.mask_grads(param_gradients, self.network.parameters())
        self.optimizer.step()
        self.update_count += 1

        return {'loss': mean_loss}

    def mask_grads(self, gradients, params):
        '''
        Here a mask with continuous values in the range [0,1] is formed to control the amount of update for each
        parameter based on the agreement of gradients coming from different environments.
        '''
        device = gradients[0][0].device
        for param, grads in zip(params, gradients):
            grads = torch.stack(grads, dim=0)
            avg_grad = torch.mean(grads, dim=0)
            grad_signs = torch.sign(grads)
            gamma = torch.tensor(1.0).to(device)
            grads_var = grads.var(dim=0)
            grads_var[torch.isnan(grads_var)] = 1e-17
            lam = (gamma * grads_var).pow(-1)
            mask = torch.tanh(self.k * lam * (torch.abs(grad_signs.mean(dim=0)) - self.tau))
            mask = torch.max(mask, torch.zeros_like(mask))
            mask[torch.isnan(mask)] = 1e-17
            mask_t = (mask.sum() / mask.numel())
            param.grad = mask * avg_grad
            param.grad *= (1. / (1e-10 + mask_t))


class Fishr(Algorithm):
    "Invariant Gradients variances for Out-of-distribution Generalization"

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        assert backpack is not None, "Install backpack with: 'pip install backpack-for-pytorch==1.3.0'"
        super(Fishr, self).__init__(input_shape, num_classes, num_domains, hparams)
        self.num_domains = num_domains

        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = extend(
            networks.Classifier(
                self.featurizer.n_outputs,
                num_classes,
                self.hparams['nonlinear_classifier'],
            )
        )
        self.network = nn.Sequential(self.featurizer, self.classifier)

        self.register_buffer("update_count", torch.tensor([0]))
        self.bce_extended = extend(nn.CrossEntropyLoss(reduction='none'))
        self.ema_per_domain = [
            MovingAverage(ema=self.hparams["ema"], oneminusema_correction=True)
            for _ in range(self.num_domains)
        ]
        self._init_optimizer()

    def _init_optimizer(self):
        self.optimizer = torch.optim.Adam(
            list(self.featurizer.parameters()) + list(self.classifier.parameters()),
            lr=self.hparams["lr"],
            weight_decay=self.hparams["weight_decay"],
        )

    def update(self, minibatches, unlabeled=None):
        assert len(minibatches) == self.num_domains
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        len_minibatches = [x.shape[0] for x, y in minibatches]

        all_z = self.featurizer(all_x)
        all_logits = self.classifier(all_z)

        penalty = self.compute_fishr_penalty(all_logits, all_y, len_minibatches)
        all_nll = F.cross_entropy(all_logits, all_y)

        penalty_weight = 0
        if self.update_count >= self.hparams["penalty_anneal_iters"]:
            penalty_weight = self.hparams["lambda"]
            if self.update_count == self.hparams["penalty_anneal_iters"] != 0:
                # Reset Adam as in IRM or V-REx, because it may not like the sharp jump in
                # gradient magnitudes that happens at this step.
                self._init_optimizer()
        self.update_count += 1

        objective = all_nll + penalty_weight * penalty
        self.optimizer.zero_grad()
        objective.backward()
        self.optimizer.step()

        return {'loss': objective.item(), 'nll': all_nll.item(), 'penalty': penalty.item()}

    def compute_fishr_penalty(self, all_logits, all_y, len_minibatches):
        dict_grads = self._get_grads(all_logits, all_y)
        grads_var_per_domain = self._get_grads_var_per_domain(dict_grads, len_minibatches)
        return self._compute_distance_grads_var(grads_var_per_domain)

    def _get_grads(self, logits, y):
        self.optimizer.zero_grad()
        loss = self.bce_extended(logits, y).sum()
        with backpack(BatchGrad()):
            loss.backward(
                inputs=list(self.classifier.parameters()), retain_graph=True, create_graph=True
            )

        # compute individual grads for all samples across all domains simultaneously
        dict_grads = OrderedDict(
            [
                (name, weights.grad_batch.clone().view(weights.grad_batch.size(0), -1))
                for name, weights in self.classifier.named_parameters()
            ]
        )
        return dict_grads

    def _get_grads_var_per_domain(self, dict_grads, len_minibatches):
        # grads var per domain
        grads_var_per_domain = [{} for _ in range(self.num_domains)]
        for name, _grads in dict_grads.items():
            all_idx = 0
            for domain_id, bsize in enumerate(len_minibatches):
                env_grads = _grads[all_idx:all_idx + bsize]
                all_idx += bsize
                env_mean = env_grads.mean(dim=0, keepdim=True)
                env_grads_centered = env_grads - env_mean
                grads_var_per_domain[domain_id][name] = (env_grads_centered).pow(2).mean(dim=0)

        # moving average
        for domain_id in range(self.num_domains):
            grads_var_per_domain[domain_id] = self.ema_per_domain[domain_id].update(
                grads_var_per_domain[domain_id]
            )

        return grads_var_per_domain

    def _compute_distance_grads_var(self, grads_var_per_domain):

        # compute gradient variances averaged across domains
        grads_var = OrderedDict(
            [
                (
                    name,
                    torch.stack(
                        [
                            grads_var_per_domain[domain_id][name]
                            for domain_id in range(self.num_domains)
                        ],
                        dim=0
                    ).mean(dim=0)
                )
                for name in grads_var_per_domain[0].keys()
            ]
        )

        penalty = 0
        for domain_id in range(self.num_domains):
            penalty += l2_between_dicts(grads_var_per_domain[domain_id], grads_var)
        return penalty / self.num_domains

    def predict(self, x):
        return self.network(x)

class TRM(Algorithm):
    """
    Learning Representations that Support Robust Transfer of Predictors
    <https://arxiv.org/abs/2110.09940>
    """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(TRM, self).__init__(input_shape, num_classes, num_domains,hparams)
        self.register_buffer('update_count', torch.tensor([0]))
        self.num_domains = num_domains
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = nn.Linear(self.featurizer.n_outputs, num_classes).cuda()
        self.clist = [nn.Linear(self.featurizer.n_outputs, num_classes).cuda() for i in range(num_domains+1)]
        self.olist = [torch.optim.SGD(
            self.clist[i].parameters(),
            lr=1e-1,
        ) for i in range(num_domains+1)]

        self.optimizer_f = torch.optim.Adam(
            self.featurizer.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        self.optimizer_c = torch.optim.Adam(
            self.classifier.parameters(),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        # initial weights
        self.alpha = torch.ones((num_domains, num_domains)).cuda() - torch.eye(num_domains).cuda()

    @staticmethod
    def neum(v, model, batch):
        def hvp(y, w, v):

            # First backprop
            first_grads = autograd.grad(y, w, retain_graph=True, create_graph=True, allow_unused=True)
            first_grads = torch.nn.utils.parameters_to_vector(first_grads)
            # Elementwise products
            elemwise_products = first_grads @ v
            # Second backprop
            return_grads = autograd.grad(elemwise_products, w, create_graph=True)
            return_grads = torch.nn.utils.parameters_to_vector(return_grads)
            return return_grads

        v = v.detach()
        h_estimate = v
        cnt = 0.
        model.eval()
        iter = 10
        for i in range(iter):
            model.weight.grad *= 0
            y = model(batch[0].detach())
            loss = F.cross_entropy(y, batch[1].detach())
            hv = hvp(loss, model.weight, v)
            v -= hv
            v = v.detach()
            h_estimate = v + h_estimate
            h_estimate = h_estimate.detach()
            # not converge
            if torch.max(abs(h_estimate)) > 10:
                break
            cnt += 1

        model.train()
        return h_estimate.detach()

    def update(self, minibatches, unlabeled=None):

        loss_swap = 0.0
        trm = 0.0

        if self.update_count >= self.hparams['iters']:
            # TRM
            if self.hparams['class_balanced']:
                # for stability when facing unbalanced labels across environments
                for classifier in self.clist:
                    classifier.weight.data = copy.deepcopy(self.classifier.weight.data)
            self.alpha /= self.alpha.sum(1, keepdim=True)

            self.featurizer.train()
            all_x = torch.cat([x for x, y in minibatches])
            all_y = torch.cat([y for x, y in minibatches])
            all_feature = self.featurizer(all_x)
            # updating original network
            loss = F.cross_entropy(self.classifier(all_feature), all_y)

            for i in range(30):
                all_logits_idx = 0
                loss_erm = 0.
                for j, (x, y) in enumerate(minibatches):
                    # j-th domain
                    feature = all_feature[all_logits_idx:all_logits_idx + x.shape[0]]
                    all_logits_idx += x.shape[0]
                    loss_erm += F.cross_entropy(self.clist[j](feature.detach()), y)
                for opt in self.olist:
                    opt.zero_grad()
                loss_erm.backward()
                for opt in self.olist:
                    opt.step()

            # collect (feature, y)
            feature_split = list()
            y_split = list()
            all_logits_idx = 0
            for i, (x, y) in enumerate(minibatches):
                feature = all_feature[all_logits_idx:all_logits_idx + x.shape[0]]
                all_logits_idx += x.shape[0]
                feature_split.append(feature)
                y_split.append(y)

            # estimate transfer risk
            for Q, (x, y) in enumerate(minibatches):
                sample_list = list(range(len(minibatches)))
                sample_list.remove(Q)

                loss_Q = F.cross_entropy(self.clist[Q](feature_split[Q]), y_split[Q])
                grad_Q = autograd.grad(loss_Q, self.clist[Q].weight, create_graph=True)
                vec_grad_Q = nn.utils.parameters_to_vector(grad_Q)

                loss_P = [F.cross_entropy(self.clist[Q](feature_split[i]), y_split[i])*(self.alpha[Q, i].data.detach())
                          if i in sample_list else 0. for i in range(len(minibatches))]
                loss_P_sum = sum(loss_P)
                grad_P = autograd.grad(loss_P_sum, self.clist[Q].weight, create_graph=True)
                vec_grad_P = nn.utils.parameters_to_vector(grad_P).detach()
                vec_grad_P = self.neum(vec_grad_P, self.clist[Q], (feature_split[Q], y_split[Q]))

                loss_swap += loss_P_sum - self.hparams['cos_lambda'] * (vec_grad_P.detach() @ vec_grad_Q)

                for i in sample_list:
                    self.alpha[Q, i] *= (self.hparams["groupdro_eta"] * loss_P[i].data).exp()

            loss_swap /= len(minibatches)
            trm /= len(minibatches)
        else:
            # ERM
            self.featurizer.train()
            all_x = torch.cat([x for x, y in minibatches])
            all_y = torch.cat([y for x, y in minibatches])
            all_feature = self.featurizer(all_x)
            loss = F.cross_entropy(self.classifier(all_feature), all_y)

        nll = loss.item()
        self.optimizer_c.zero_grad()
        self.optimizer_f.zero_grad()
        if self.update_count >= self.hparams['iters']:
            loss_swap = (loss + loss_swap)
        else:
            loss_swap = loss

        loss_swap.backward()
        self.optimizer_f.step()
        self.optimizer_c.step()

        loss_swap = loss_swap.item() - nll
        self.update_count += 1

        return {'nll': nll, 'trm_loss': loss_swap}

    def predict(self, x):
        return self.classifier(self.featurizer(x))

    def train(self):
        self.featurizer.train()

    def eval(self):
        self.featurizer.eval()

class IB_ERM(ERM):
    """Information Bottleneck based ERM on feature with conditionning"""

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(IB_ERM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.optimizer = torch.optim.Adam(
            list(self.featurizer.parameters()) + list(self.classifier.parameters()),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        self.register_buffer('update_count', torch.tensor([0]))

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        ib_penalty_weight = (self.hparams['ib_lambda'] if self.update_count
                          >= self.hparams['ib_penalty_anneal_iters'] else
                          0.0)

        nll = 0.
        ib_penalty = 0.

        all_x = torch.cat([x for x, y in minibatches])
        all_features = self.featurizer(all_x)
        all_logits = self.classifier(all_features)
        all_logits_idx = 0
        for i, (x, y) in enumerate(minibatches):
            features = all_features[all_logits_idx:all_logits_idx + x.shape[0]]
            logits = all_logits[all_logits_idx:all_logits_idx + x.shape[0]]
            all_logits_idx += x.shape[0]
            nll += F.cross_entropy(logits, y)
            ib_penalty += features.var(dim=0).mean()

        nll /= len(minibatches)
        ib_penalty /= len(minibatches)

        # Compile loss
        loss = nll
        loss += ib_penalty_weight * ib_penalty

        if self.update_count == self.hparams['ib_penalty_anneal_iters']:
            # Reset Adam, because it doesn't like the sharp jump in gradient
            # magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                list(self.featurizer.parameters()) + list(self.classifier.parameters()),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay'])

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_count += 1
        return {'loss': loss.item(),
                'nll': nll.item(),
                'IB_penalty': ib_penalty.item()}

class IB_IRM(ERM):
    """Information Bottleneck based IRM on feature with conditionning"""

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(IB_IRM, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        self.optimizer = torch.optim.Adam(
            list(self.featurizer.parameters()) + list(self.classifier.parameters()),
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )
        self.register_buffer('update_count', torch.tensor([0]))

    @staticmethod
    def _irm_penalty(logits, y):
        device = "cuda" if logits[0][0].is_cuda else "cpu"
        scale = torch.tensor(1.).to(device).requires_grad_()
        loss_1 = F.cross_entropy(logits[::2] * scale, y[::2])
        loss_2 = F.cross_entropy(logits[1::2] * scale, y[1::2])
        grad_1 = autograd.grad(loss_1, [scale], create_graph=True)[0]
        grad_2 = autograd.grad(loss_2, [scale], create_graph=True)[0]
        result = torch.sum(grad_1 * grad_2)
        return result

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        irm_penalty_weight = (self.hparams['irm_lambda'] if self.update_count
                          >= self.hparams['irm_penalty_anneal_iters'] else
                          1.0)
        ib_penalty_weight = (self.hparams['ib_lambda'] if self.update_count
                          >= self.hparams['ib_penalty_anneal_iters'] else
                          0.0)

        nll = 0.
        irm_penalty = 0.
        ib_penalty = 0.

        all_x = torch.cat([x for x, y in minibatches])
        all_features = self.featurizer(all_x)
        all_logits = self.classifier(all_features)
        all_logits_idx = 0
        for i, (x, y) in enumerate(minibatches):
            features = all_features[all_logits_idx:all_logits_idx + x.shape[0]]
            logits = all_logits[all_logits_idx:all_logits_idx + x.shape[0]]
            all_logits_idx += x.shape[0]
            nll += F.cross_entropy(logits, y)
            irm_penalty += self._irm_penalty(logits, y)
            ib_penalty += features.var(dim=0).mean()

        nll /= len(minibatches)
        irm_penalty /= len(minibatches)
        ib_penalty /= len(minibatches)

        # Compile loss
        loss = nll
        loss += irm_penalty_weight * irm_penalty
        loss += ib_penalty_weight * ib_penalty

        if self.update_count == self.hparams['irm_penalty_anneal_iters'] or self.update_count == self.hparams['ib_penalty_anneal_iters']:
            # Reset Adam, because it doesn't like the sharp jump in gradient
            # magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                list(self.featurizer.parameters()) + list(self.classifier.parameters()),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay'])

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_count += 1
        return {'loss': loss.item(),
                'nll': nll.item(),
                'IRM_penalty': irm_penalty.item(),
                'IB_penalty': ib_penalty.item()}


class AbstractCAD(Algorithm):
    """Contrastive adversarial domain bottleneck (abstract class)
    from Optimal Representations for Covariate Shift <https://arxiv.org/abs/2201.00057>
    """

    def __init__(self, input_shape, num_classes, num_domains,
                 hparams, is_conditional):
        super(AbstractCAD, self).__init__(input_shape, num_classes, num_domains, hparams)

        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])
        params = list(self.featurizer.parameters()) + list(self.classifier.parameters())

        # parameters for domain bottleneck loss
        self.is_conditional = is_conditional  # whether to use bottleneck conditioned on the label
        self.base_temperature = 0.07
        self.temperature = hparams['temperature']
        self.is_project = hparams['is_project']  # whether apply projection head
        self.is_normalized = hparams['is_normalized'] # whether apply normalization to representation when computing loss

        # whether flip maximize log(p) (False) to minimize -log(1-p) (True) for the bottleneck loss
        # the two versions have the same optima, but we find the latter is more stable
        self.is_flipped = hparams["is_flipped"]

        if self.is_project:
            self.project = nn.Sequential(
                nn.Linear(feature_dim, feature_dim),
                nn.ReLU(inplace=True),
                nn.Linear(feature_dim, 128),
            )
            params += list(self.project.parameters())

        # Optimizers
        self.optimizer = torch.optim.Adam(
            params,
            lr=self.hparams["lr"],
            weight_decay=self.hparams['weight_decay']
        )

    def bn_loss(self, z, y, dom_labels):
        """Contrastive based domain bottleneck loss
         The implementation is based on the supervised contrastive loss (SupCon) introduced by
         P. Khosla, et al., in “Supervised Contrastive Learning“.
        Modified from  https://github.com/HobbitLong/SupContrast/blob/8d0963a7dbb1cd28accb067f5144d61f18a77588/losses.py#L11
        """
        device = z.device
        batch_size = z.shape[0]

        y = y.contiguous().view(-1, 1)
        dom_labels = dom_labels.contiguous().view(-1, 1)
        mask_y = torch.eq(y, y.T).to(device)
        mask_d = (torch.eq(dom_labels, dom_labels.T)).to(device)
        mask_drop = ~torch.eye(batch_size).bool().to(device)  # drop the "current"/"self" example
        mask_y &= mask_drop
        mask_y_n_d = mask_y & (~mask_d)  # contain the same label but from different domains
        mask_y_d = mask_y & mask_d  # contain the same label and the same domain
        mask_y, mask_drop, mask_y_n_d, mask_y_d = mask_y.float(), mask_drop.float(), mask_y_n_d.float(), mask_y_d.float()

        # compute logits
        if self.is_project:
            z = self.project(z)
        if self.is_normalized:
            z = F.normalize(z, dim=1)
        outer = z @ z.T
        logits = outer / self.temperature
        logits = logits * mask_drop
        # for numerical stability
        logits_max, _ = torch.max(logits, dim=1, keepdim=True)
        logits = logits - logits_max.detach()

        if not self.is_conditional:
            # unconditional CAD loss
            denominator = torch.logsumexp(logits + mask_drop.log(), dim=1, keepdim=True)
            log_prob = logits - denominator

            mask_valid = (mask_y.sum(1) > 0)
            log_prob = log_prob[mask_valid]
            mask_d = mask_d[mask_valid]

            if self.is_flipped:  # maximize log prob of samples from different domains
                bn_loss = - (self.temperature / self.base_temperature) * torch.logsumexp(
                    log_prob + (~mask_d).float().log(), dim=1)
            else:  # minimize log prob of samples from same domain
                bn_loss = (self.temperature / self.base_temperature) * torch.logsumexp(
                    log_prob + (mask_d).float().log(), dim=1)
        else:
            # conditional CAD loss
            if self.is_flipped:
                mask_valid = (mask_y_n_d.sum(1) > 0)
            else:
                mask_valid = (mask_y_d.sum(1) > 0)

            mask_y = mask_y[mask_valid]
            mask_y_d = mask_y_d[mask_valid]
            mask_y_n_d = mask_y_n_d[mask_valid]
            logits = logits[mask_valid]

            # compute log_prob_y with the same label
            denominator = torch.logsumexp(logits + mask_y.log(), dim=1, keepdim=True)
            log_prob_y = logits - denominator

            if self.is_flipped:  # maximize log prob of samples from different domains and with same label
                bn_loss = - (self.temperature / self.base_temperature) * torch.logsumexp(
                    log_prob_y + mask_y_n_d.log(), dim=1)
            else:  # minimize log prob of samples from same domains and with same label
                bn_loss = (self.temperature / self.base_temperature) * torch.logsumexp(
                    log_prob_y + mask_y_d.log(), dim=1)

        def finite_mean(x):
            # only 1D for now
            num_finite = (torch.isfinite(x).float()).sum()
            mean = torch.where(torch.isfinite(x), x, torch.tensor(0.0).to(x)).sum()
            if num_finite != 0:
                mean = mean / num_finite
            else:
                return torch.tensor(0.0).to(x)
            return mean

        return finite_mean(bn_loss)

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        all_z = self.featurizer(all_x)
        all_d = torch.cat([
            torch.full((x.shape[0],), i, dtype=torch.int64, device=device)
            for i, (x, y) in enumerate(minibatches)
        ])

        bn_loss = self.bn_loss(all_z, all_y, all_d)
        clf_out = self.classifier(all_z)
        clf_loss = F.cross_entropy(clf_out, all_y)
        total_loss = clf_loss + self.hparams['lmbda'] * bn_loss

        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()

        return {"clf_loss": clf_loss.item(), "bn_loss": bn_loss.item(), "total_loss": total_loss.item()}

    def predict(self, x):
        return self.classifier(self.featurizer(x))


class CAD(AbstractCAD):
    """Contrastive Adversarial Domain (CAD) bottleneck

       Properties:
       - Minimize I(D;Z)
       - Require access to domain labels but not task labels
       """

    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CAD, self).__init__(input_shape, num_classes, num_domains, hparams, is_conditional=False)


class CondCAD(AbstractCAD):
    """Conditional Contrastive Adversarial Domain (CAD) bottleneck

    Properties:
    - Minimize I(D;Z|Y)
    - Require access to both domain labels and task labels
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CondCAD, self).__init__(input_shape, num_classes, num_domains, hparams, is_conditional=True)


class Transfer(Algorithm):
    '''Algorithm 1 in Quantifying and Improving Transferability in Domain Generalization (https://arxiv.org/abs/2106.03632)'''
    ''' tries to ensure transferability among source domains, and thus transferabiilty between source and target'''
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(Transfer, self).__init__(input_shape, num_classes, num_domains, hparams)
        self.register_buffer('update_count', torch.tensor([0]))
        self.d_steps_per_g = hparams['d_steps_per_g']

        # Architecture
        self.featurizer = networks.Featurizer(input_shape, self.hparams)
        self.classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])
        self.adv_classifier = networks.Classifier(
            self.featurizer.n_outputs,
            num_classes,
            self.hparams['nonlinear_classifier'])
        self.adv_classifier.load_state_dict(self.classifier.state_dict())

        # Optimizers
        if self.hparams['gda']:
            self.optimizer = torch.optim.SGD(self.adv_classifier.parameters(), lr=self.hparams['lr'])
        else:
            self.optimizer = torch.optim.Adam(
            (list(self.featurizer.parameters()) + list(self.classifier.parameters())),
                lr=self.hparams["lr"],
                weight_decay=self.hparams['weight_decay'])

        self.adv_opt = torch.optim.SGD(self.adv_classifier.parameters(), lr=self.hparams['lr_d'])

    def loss_gap(self, minibatches, device):
        ''' compute gap = max_i loss_i(h) - min_j loss_j(h), return i, j, and the gap for a single batch'''
        max_env_loss, min_env_loss =  torch.tensor([-float('inf')], device=device), torch.tensor([float('inf')], device=device)
        for x, y in minibatches:
            p = self.adv_classifier(self.featurizer(x))
            loss = F.cross_entropy(p, y)
            if loss > max_env_loss:
                max_env_loss = loss
            if loss < min_env_loss:
                min_env_loss = loss
        return max_env_loss - min_env_loss

    def update(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        # outer loop
        all_x = torch.cat([x for x, y in minibatches])
        all_y = torch.cat([y for x, y in minibatches])
        loss = F.cross_entropy(self.predict(all_x), all_y)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        del all_x, all_y
        gap = self.hparams['t_lambda'] * self.loss_gap(minibatches, device)
        self.optimizer.zero_grad()
        gap.backward()
        self.optimizer.step()
        self.adv_classifier.load_state_dict(self.classifier.state_dict())
        for _ in range(self.d_steps_per_g):
            self.adv_opt.zero_grad()
            gap = -self.hparams['t_lambda'] * self.loss_gap(minibatches, device)
            gap.backward()
            self.adv_opt.step()
            self.adv_classifier = proj(self.hparams['delta'], self.adv_classifier, self.classifier)
        return {'loss': loss.item(), 'gap': -gap.item()}

    def update_second(self, minibatches, unlabeled=None):
        device = "cuda" if minibatches[0][0].is_cuda else "cpu"
        self.update_count = (self.update_count + 1) % (1 + self.d_steps_per_g)
        if self.update_count.item() == 1:
            all_x = torch.cat([x for x, y in minibatches])
            all_y = torch.cat([y for x, y in minibatches])
            loss = F.cross_entropy(self.predict(all_x), all_y)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            del all_x, all_y
            gap = self.hparams['t_lambda'] * self.loss_gap(minibatches, device)
            self.optimizer.zero_grad()
            gap.backward()
            self.optimizer.step()
            self.adv_classifier.load_state_dict(self.classifier.state_dict())
            return {'loss': loss.item(), 'gap': gap.item()}
        else:
            self.adv_opt.zero_grad()
            gap = -self.hparams['t_lambda'] * self.loss_gap(minibatches, device)
            gap.backward()
            self.adv_opt.step()
            self.adv_classifier = proj(self.hparams['delta'], self.adv_classifier, self.classifier)
            return {'gap': -gap.item()}


    def predict(self, x):
        return self.classifier(self.featurizer(x))


class AbstractCausIRL(ERM):
    '''Abstract class for Causality based invariant representation learning algorithm from (https://arxiv.org/abs/2206.11646)'''
    def __init__(self, input_shape, num_classes, num_domains, hparams, gaussian):
        super(AbstractCausIRL, self).__init__(input_shape, num_classes, num_domains,
                                  hparams)
        if gaussian:
            self.kernel_type = "gaussian"
        else:
            self.kernel_type = "mean_cov"

    def my_cdist(self, x1, x2):
        x1_norm = x1.pow(2).sum(dim=-1, keepdim=True)
        x2_norm = x2.pow(2).sum(dim=-1, keepdim=True)
        res = torch.addmm(x2_norm.transpose(-2, -1),
                          x1,
                          x2.transpose(-2, -1), alpha=-2).add_(x1_norm)
        return res.clamp_min_(1e-30)

    def gaussian_kernel(self, x, y, gamma=[0.001, 0.01, 0.1, 1, 10, 100,
                                           1000]):
        D = self.my_cdist(x, y)
        K = torch.zeros_like(D)

        for g in gamma:
            K.add_(torch.exp(D.mul(-g)))

        return K

    def mmd(self, x, y):
        if self.kernel_type == "gaussian":
            Kxx = self.gaussian_kernel(x, x).mean()
            Kyy = self.gaussian_kernel(y, y).mean()
            Kxy = self.gaussian_kernel(x, y).mean()
            return Kxx + Kyy - 2 * Kxy
        else:
            mean_x = x.mean(0, keepdim=True)
            mean_y = y.mean(0, keepdim=True)
            cent_x = x - mean_x
            cent_y = y - mean_y
            cova_x = (cent_x.t() @ cent_x) / (len(x) - 1)
            cova_y = (cent_y.t() @ cent_y) / (len(y) - 1)

            mean_diff = (mean_x - mean_y).pow(2).mean()
            cova_diff = (cova_x - cova_y).pow(2).mean()

            return mean_diff + cova_diff

    def update(self, minibatches, unlabeled=None):
        objective = 0
        penalty = 0
        nmb = len(minibatches)

        features = [self.featurizer(xi) for xi, _ in minibatches]
        classifs = [self.classifier(fi) for fi in features]
        targets = [yi for _, yi in minibatches]

        first = None
        second = None

        for i in range(nmb):
            objective += F.cross_entropy(classifs[i] + 1e-16, targets[i])
            slice = np.random.randint(0, len(features[i]))
            if first is None:
                first = features[i][:slice]
                second = features[i][slice:]
            else:
                first = torch.cat((first, features[i][:slice]), 0)
                second = torch.cat((second, features[i][slice:]), 0)
        if len(first) > 1 and len(second) > 1:
            penalty = torch.nan_to_num(self.mmd(first, second))
        else:
            penalty = torch.tensor(0)
        objective /= nmb

        self.optimizer.zero_grad()
        (objective + (self.hparams['mmd_gamma']*penalty)).backward()
        self.optimizer.step()

        if torch.is_tensor(penalty):
            penalty = penalty.item()

        return {'loss': objective.item(), 'penalty': penalty}


class CausIRL_MMD(AbstractCausIRL):
    '''Causality based invariant representation learning algorithm using the MMD distance from (https://arxiv.org/abs/2206.11646)'''
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CausIRL_MMD, self).__init__(input_shape, num_classes, num_domains,
                                  hparams, gaussian=True)


class CausIRL_CORAL(AbstractCausIRL):
    '''Causality based invariant representation learning algorithm using the CORAL distance from (https://arxiv.org/abs/2206.11646)'''
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(CausIRL_CORAL, self).__init__(input_shape, num_classes, num_domains,
                                  hparams, gaussian=False)


class EQRM(ERM):
    """
    Empirical Quantile Risk Minimization (EQRM).
    Algorithm 1 from [https://arxiv.org/pdf/2207.09944.pdf].
    """
    def __init__(self, input_shape, num_classes, num_domains, hparams, dist=None):
        super().__init__(input_shape, num_classes, num_domains, hparams)
        self.register_buffer('update_count', torch.tensor([0]))
        self.register_buffer('alpha', torch.tensor(self.hparams["eqrm_quantile"], dtype=torch.float64))
        if dist is None:
            self.dist = Nonparametric()
        else:
            self.dist = dist

    def risk(self, x, y):
        return F.cross_entropy(self.network(x), y).reshape(1)

    def update(self, minibatches, unlabeled=None):
        env_risks = torch.cat([self.risk(x, y) for x, y in minibatches])

        if self.update_count < self.hparams["eqrm_burnin_iters"]:
            # Burn-in/annealing period uses ERM like penalty methods (which set penalty_weight=0, e.g. IRM, VREx.)
            loss = torch.mean(env_risks)
        else:
            # Loss is the alpha-quantile value
            self.dist.estimate_parameters(env_risks)
            loss = self.dist.icdf(self.alpha)

        if self.update_count == self.hparams['eqrm_burnin_iters']:
            # Reset Adam (like IRM, VREx, etc.), because it doesn't like the sharp jump in
            # gradient magnitudes that happens at this step.
            self.optimizer = torch.optim.Adam(
                self.network.parameters(),
                lr=self.hparams["eqrm_lr"],
                weight_decay=self.hparams['weight_decay'])

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_count += 1

        return {'loss': loss.item()}


class ADRMX(Algorithm):
    '''ADRMX: Additive Disentanglement of Domain Features with Remix Loss from (https://arxiv.org/abs/2308.06624)'''
    def __init__(self, input_shape, num_classes, num_domains, hparams):
        super(ADRMX, self).__init__(input_shape, num_classes, num_domains,
                                   hparams)
        self.register_buffer('update_count', torch.tensor([0]))
        
        self.num_classes = num_classes
        self.num_domains = num_domains
        self.mix_num = 1
        self.scl_int = SupConLossLambda(lamda=0.5)
        self.scl_final = SupConLossLambda(lamda=0.5)
        
        self.featurizer_label = networks.Featurizer(input_shape, self.hparams)
        self.featurizer_domain = networks.Featurizer(input_shape, self.hparams)

        self.discriminator = networks.MLP(self.featurizer_domain.n_outputs,
            num_domains, self.hparams)

        self.classifier_label_1 = networks.Classifier(
            self.featurizer_label.n_outputs,
            num_classes,
            is_nonlinear=True)

        self.classifier_label_2 = networks.Classifier(
            self.featurizer_label.n_outputs,
            num_classes,
            is_nonlinear=True)

        self.classifier_domain = networks.Classifier(
            self.featurizer_domain.n_outputs,
            num_domains,
            is_nonlinear=True)


        self.network = nn.Sequential(self.featurizer_label, self.classifier_label_1)

        self.disc_opt = torch.optim.Adam(
            (list(self.discriminator.parameters())),
            lr=self.hparams["lr"],
            betas=(self.hparams['beta1'], 0.9))

        self.opt = torch.optim.Adam(
            (list(self.featurizer_label.parameters()) +
             list(self.featurizer_domain.parameters()) +
             list(self.classifier_label_1.parameters()) +
                list(self.classifier_label_2.parameters()) +
                list(self.classifier_domain.parameters())),
            lr=self.hparams["lr"],
            betas=(self.hparams['beta1'], 0.9))

        
    def update(self, minibatches, unlabeled=None):

        self.update_count += 1
        all_x = torch.cat([x for x, _ in minibatches])
        all_y = torch.cat([y for _, y in minibatches])

        feat_label = self.featurizer_label(all_x)
        feat_domain = self.featurizer_domain(all_x)
        feat_combined = feat_label - feat_domain

        # get domain labels
        disc_labels = torch.cat([
            torch.full((x.shape[0], ), i, dtype=torch.int64, device=all_x.device)
            for i, (x, _) in enumerate(minibatches)
        ])
        # predict domain feats from disentangled features
        disc_out = self.discriminator(feat_combined) 
        disc_loss = F.cross_entropy(disc_out, disc_labels) # discriminative loss for final labels (ascend/descend)

        d_steps_per_g = self.hparams['d_steps_per_g_step']
        # alternating losses
        if (self.update_count.item() % (1+d_steps_per_g) < d_steps_per_g):
            # in discriminator turn
            self.disc_opt.zero_grad()
            disc_loss.backward()
            self.disc_opt.step()
            return {'loss_disc': disc_loss.item()}
        else:
            # in generator turn

            # calculate CE from x_domain
            domain_preds = self.classifier_domain(feat_domain)
            classifier_loss_domain = F.cross_entropy(domain_preds, disc_labels) # domain clf loss
            classifier_remixed_loss = 0

            # calculate CE and contrastive loss from x_label
            int_preds = self.classifier_label_1(feat_label)
            classifier_loss_int = F.cross_entropy(int_preds, all_y) # intermediate CE Loss
            cnt_loss_int = self.scl_int(feat_label, all_y, disc_labels)

            # calculate CE and contrastive loss from x_dinv
            final_preds = self.classifier_label_2(feat_combined)
            classifier_loss_final = F.cross_entropy(final_preds, all_y) # final CE Loss
            cnt_loss_final = self.scl_final(feat_combined, all_y, disc_labels)

            # remix strategy
            for i in range(self.num_classes):
                indices = torch.where(all_y == i)[0]
                for _ in range(self.mix_num):
                    # get two instances from same class with different domains
                    perm = torch.randperm(indices.numel())
                    if len(perm) < 2:
                        continue
                    idx1, idx2 = perm[:2]
                    # remix
                    remixed_feat = feat_combined[idx1] + feat_domain[idx2]
                    # make prediction
                    pred = self.classifier_label_1(remixed_feat.view(1,-1))
                    # accumulate the loss
                    classifier_remixed_loss += F.cross_entropy(pred.view(1, -1), all_y[idx1].view(-1))
            # normalize
            classifier_remixed_loss /= (self.num_classes * self.mix_num)

            # generator loss negates the discrimination loss (negative update)
            gen_loss = (classifier_loss_int +
                        classifier_loss_final +
                        self.hparams["dclf_lambda"] * classifier_loss_domain +
                        self.hparams["rmxd_lambda"] * classifier_remixed_loss +
                        self.hparams['cnt_lambda'] * (cnt_loss_int + cnt_loss_final) + 
                        (self.hparams['disc_lambda'] * -disc_loss))
            self.disc_opt.zero_grad()
            self.opt.zero_grad()
            gen_loss.backward()
            self.opt.step()

            return {'loss_total': gen_loss.item(), 
                'loss_cnt_int': cnt_loss_int.item(),
                'loss_cnt_final': cnt_loss_final.item(),
                'loss_clf_int': classifier_loss_int.item(), 
                'loss_clf_fin': classifier_loss_final.item(), 
                'loss_dmn': classifier_loss_domain.item(), 
                'loss_disc': disc_loss.item(),
                'loss_remixed': classifier_remixed_loss.item(),
                }
    
    def predict(self, x):
        return self.network(x)

================================================
FILE: transopt/benchmark/HPOOOD/collect_results.py
================================================
import os
import numpy as np
import json
import pandas as pd
import re

import matplotlib.pyplot as plt


out_put_dir = '/home/cola/transopt_files/output1/results'
analysis_dir = './analysis_res/'


def find_jsonl_files(directory):
    jsonl_files = []
    # 遍历指定目录及其子目录
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".jsonl"):
                jsonl_files.append(os.path.join(root, file))
    
    # 按照文件名中的数字序号进行排序
    jsonl_files.sort(key=lambda x: int(re.search(r'/(\d+)_', x).group(1)))
    
    return jsonl_files

def find_dirs(directory):
    dir_files = []
    # 遍历指定目录及其子目录
    for root, dirs, files in os.walk(directory):
        for dir in dirs:
            dir_files.append(os.path.join(root, dir))
    return dir_files

def remove_empty_directories(directory):
    # 遍历指定目录下的所有子目录
    for root, dirs, files in os.walk(directory, topdown=False):
        for dir_name in dirs:
            dir_path = os.path.join(root, dir_name)
            # 检查目录是否为空
            if not os.listdir(dir_path):
                # 如果目录为空，则删除
                print(f"Removing empty directory: {dir_path}")
                os.rmdir(dir_path)

# remove_empty_directories(out_put_dir)
# print(find_dirs(out_put_dir))

def plot_bins(test_data, val_data, save_file_name):
    os.makedirs(analysis_dir + 'bins/', exist_ok=True)
    
    plt.clf()
    bins = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
    plt.hist(test_data, bins=bins, alpha=0.5, label='test acc', color='blue', edgecolor='black')
    plt.hist(val_data, bins=bins, alpha=0.5, label='val acc', color='orange', edgecolor='black')

    
    plt.savefig(analysis_dir + 'bins/' + save_file_name)


def plot_traj(test_data, val_data, save_file_name):
    os.makedirs(analysis_dir + 'traj/', exist_ok=True)
    
    plt.clf()
    # test_data = np.maximum.accumulate(test_data).flatten()
    # val_data = np.maximum.accumulate(val_data).flatten()
    plt.plot(test_data, label='test acc', color='blue')
    plt.plot(val_data, label='val acc', color='orange')
    plt.legend()
    plt.savefig(analysis_dir + 'traj/' + save_file_name)
    

def plot_scatter(x, y, values, save_file_name):
    os.makedirs(analysis_dir + 'scatter/', exist_ok=True)
    
    plt.clf()
    plt.scatter(x, y, s=100, c=values, cmap='Reds', edgecolor='black')
    plt.colorbar(label='Value')


    # 设置标题和标签
    plt.title('Scatter Plot with Color Mapping')
    plt.savefig(analysis_dir + 'scatter/' + save_file_name)


def print_table(table, header_text, row_labels, col_labels, colwidth=10,
    latex=True):
    """Pretty-print a 2D array of data, optionally with row/col labels"""
    print("")

    if latex:
        num_cols = len(table[0])
        print("\\begin{center}")
        print("\\adjustbox{max width=\\textwidth}{%")
        print("\\begin{tabular}{l" + "c" * num_cols + "}")
        print("\\toprule")
    else:
        print("--------", header_text)

    for row, label in zip(table, row_labels):
        row.insert(0, label)

    if latex:
        col_labels = ["\\textbf{" + str(col_label).replace("%", "\\%") + "}"
            for col_label in col_labels]
    table.insert(0, col_labels)

    for r, row in enumerate(table):
        misc.print_row(row, colwidth=colwidth, latex=latex)
        if latex and r == 0:
            print("\\midrule")
    if latex:
        print("\\bottomrule")
        print("\\end{tabular}}")
        print("\\end{center}")


NN_name = {}
datasets = {}
test_env = [0,1]
for dir_name in find_dirs(out_put_dir):
    dir_name = dir_name.split('/')[-1]
    # if 'ERM' in dir_name:
    #     continue
    # if 'IRM' in dir_name:
    #     continue
    NN_name[dir_name.split('_')[0]] = 1
    datasets[dir_name.split('_')[1]] = 1

df = pd.DataFrame(0, index=list(datasets.keys()), columns=list(NN_name.keys()))
df2 = pd.DataFrame(0, index=list(datasets.keys()), columns=list(NN_name.keys()))

for dir_name in find_dirs(out_put_dir):
    # if 'ERM' in dir_name:
    #     continue
    # if 'IRM' in dir_name:
    #     continue
    dir_name = dir_name.split('/')[-1]
    nn_name=dir_name.split('_')[0]
    dataset_name = dir_name.split('_')[1]
    best_val_acc = 0
    best_test_acc = 0
    best_test_acc2 = 0
    all_test = []
    all_valid = []
    
    location = []
    # if 'ColoredMNIST' in dir_name:
    #     continue
    for  file_name in find_jsonl_files(out_put_dir + '/' + dir_name):
        # print(file_name)
        f_name = file_name.split('/')[-1]
        weight_decay = float(f_name.split('_')[-1][:-6])
        lr = float(f_name.split('_')[-4])
        location.append([lr, weight_decay])
        with open(file_name, 'r') as f:
            try:
                results = json.load(f)
                print(results)
                val_acc = []
                test_acc = []
                for t_env in test_env:
                    for k,v in results.items():
                        if f'env{t_env}_out_acc' == k:
                            test_acc.append(v)

                for k,v in results.items():
                    pattern = r'env\d+_val_acc'
                    if re.match(pattern, k):
                        number = int(k[3])
                        if number not in test_env:
                            val_acc.append(v)

                val_acc_mean = np.mean(val_acc)
                test_acc_mean = np.mean(test_acc)
                
                all_test.append(test_acc_mean)
                all_valid.append(val_acc_mean)
                
                if test_acc_mean > best_test_acc:
                    best_test_acc = test_acc_mean
                
                if val_acc_mean > best_val_acc:
                    best_val_acc = val_acc_mean
                    best_test_acc2 = test_acc_mean

            except:
                print(f'{file_name} can not open')
                continue
    plot_bins(all_test, all_valid, f'{dir_name}.png')
    plot_traj(all_test, all_valid, f'{dir_name}_traj.png')
    locations = np.array(location)
    plot_scatter(locations[:,0], locations[:,1], all_valid, f'{dir_name}_scatter.png')
    
    df.at[dataset_name, nn_name] = best_test_acc
    df2.at[dataset_name, nn_name] = best_test_acc2
print(df)
print('------------------')
print(df2)


    # with open(file, 'r') as f:
    #     it = file.split('/')[-1].split('_')[0]
    #     print(it)
    #     results = json.load(f)


================================================
FILE: transopt/benchmark/HPOOOD/download.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

from collections import defaultdict
from torchvision.datasets import MNIST
import xml.etree.ElementTree as ET
from zipfile import ZipFile
import argparse
import tarfile
import shutil
import gdown
import uuid
import json
import os
import urllib

from wilds.datasets.camelyon17_dataset import Camelyon17Dataset
from wilds.datasets.fmow_dataset import FMoWDataset


# utils #######################################################################

def stage_path(data_dir, name):
    full_path = os.path.join(data_dir, name)

    if not os.path.exists(full_path):
        os.makedirs(full_path)

    return full_path


def download_and_extract(url, dst, remove=True):
    gdown.download(url, dst, quiet=False)

    if dst.endswith(".tar.gz"):
        tar = tarfile.open(dst, "r:gz")
        tar.extractall(os.path.dirname(dst))
        tar.close()

    if dst.endswith(".tar"):
        tar = tarfile.open(dst, "r:")
        tar.extractall(os.path.dirname(dst))
        tar.close()

    if dst.endswith(".zip"):
        zf = ZipFile(dst, "r")
        zf.extractall(os.path.dirname(dst))
        zf.close()

    if remove:
        os.remove(dst)


# VLCS ########################################################################

# Slower, but builds dataset from the original sources
#
# def download_vlcs(data_dir):
#     full_path = stage_path(data_dir, "VLCS")
#
#     tmp_path = os.path.join(full_path, "tmp/")
#     if not os.path.exists(tmp_path):
#         os.makedirs(tmp_path)
#
#     with open("domainbed/misc/vlcs_files.txt", "r") as f:
#         lines = f.readlines()
#         files = [line.strip().split() for line in lines]
#
#     download_and_extract("http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar",
#                          os.path.join(tmp_path, "voc2007_trainval.tar"))
#
#     download_and_extract("https://drive.google.com/uc?id=1I8ydxaAQunz9R_qFFdBFtw6rFTUW9goz",
#                          os.path.join(tmp_path, "caltech101.tar.gz"))
#
#     download_and_extract("http://groups.csail.mit.edu/vision/Hcontext/data/sun09_hcontext.tar",
#                          os.path.join(tmp_path, "sun09_hcontext.tar"))
#
#     tar = tarfile.open(os.path.join(tmp_path, "sun09.tar"), "r:")
#     tar.extractall(tmp_path)
#     tar.close()
#
#     for src, dst in files:
#         class_folder = os.path.join(data_dir, dst)
#
#         if not os.path.exists(class_folder):
#             os.makedirs(class_folder)
#
#         dst = os.path.join(class_folder, uuid.uuid4().hex + ".jpg")
#
#         if "labelme" in src:
#             # download labelme from the web
#             gdown.download(src, dst, quiet=False)
#         else:
#             src = os.path.join(tmp_path, src)
#             shutil.copyfile(src, dst)
#
#     shutil.rmtree(tmp_path)


def download_vlcs(data_dir):
    # Original URL: http://www.eecs.qmul.ac.uk/~dl307/project_iccv2017
    full_path = stage_path(data_dir, "VLCS")

    download_and_extract("https://drive.google.com/uc?id=1skwblH1_okBwxWxmRsp9_qi15hyPpxg8",
                         os.path.join(data_dir, "VLCS.tar.gz"))


# MNIST #######################################################################

def download_mnist(data_dir):
    # Original URL: http://yann.lecun.com/exdb/mnist/
    full_path = stage_path(data_dir, "MNIST")
    MNIST(full_path, download=True)


# PACS ########################################################################

def download_pacs(data_dir):
    # Original URL: http://www.eecs.qmul.ac.uk/~dl307/project_iccv2017
    full_path = stage_path(data_dir, "PACS")

    download_and_extract("https://drive.google.com/uc?id=1JFr8f805nMUelQWWmfnJR3y4_SYoN5Pd",
                         os.path.join(data_dir, "PACS.zip"))

    os.rename(os.path.join(data_dir, "kfold"),
              full_path)


# Office-Home #################################################################

def download_office_home(data_dir):
    # Original URL: http://hemanthdv.org/OfficeHome-Dataset/
    full_path = stage_path(data_dir, "office_home")

    download_and_extract("https://drive.google.com/uc?id=1uY0pj7oFsjMxRwaD3Sxy0jgel0fsYXLC",
                         os.path.join(data_dir, "office_home.zip"))

    os.rename(os.path.join(data_dir, "OfficeHomeDataset_10072016"),
              full_path)


# DomainNET ###################################################################

def download_domain_net(data_dir):
    # Original URL: http://ai.bu.edu/M3SDA/
    full_path = stage_path(data_dir, "domain_net")

    urls = [
        "http://csr.bu.edu/ftp/visda/2019/multi-source/groundtruth/clipart.zip",
        "http://csr.bu.edu/ftp/visda/2019/multi-source/infograph.zip",
        "http://csr.bu.edu/ftp/visda/2019/multi-source/groundtruth/painting.zip",
        "http://csr.bu.edu/ftp/visda/2019/multi-source/quickdraw.zip",
        "http://csr.bu.edu/ftp/visda/2019/multi-source/real.zip",
        "http://csr.bu.edu/ftp/visda/2019/multi-source/sketch.zip"
    ]

    for url in urls:
        download_and_extract(url, os.path.join(full_path, url.split("/")[-1]))

    with open("domainbed/misc/domain_net_duplicates.txt", "r") as f:
        for line in f.readlines():
            try:
                os.remove(os.path.join(full_path, line.strip()))
            except OSError:
                pass


# TerraIncognita ##############################################################

def download_terra_incognita(data_dir):
    # Original URL: https://beerys.github.io/CaltechCameraTraps/
    # New URL: http://lila.science/datasets/caltech-camera-traps

    full_path = stage_path(data_dir, "terra_incognita")

    download_and_extract(
        "https://storage.googleapis.com/public-datasets-lila/caltechcameratraps/eccv_18_all_images_sm.tar.gz",
        os.path.join(full_path, "terra_incognita_images.tar.gz"))


    download_and_extract(
        "https://storage.googleapis.com/public-datasets-lila/caltechcameratraps/eccv_18_annotations.tar.gz",
        os.path.join(full_path, "eccv_18_annotations.tar.gz"))


    include_locations = ["38", "46", "100", "43"]

    include_categories = [
        "bird", "bobcat", "cat", "coyote", "dog", "empty", "opossum", "rabbit",
        "raccoon", "squirrel"
    ]

    images_folder = os.path.join(full_path, "eccv_18_all_images_sm/")
    annotations_folder = os.path.join(full_path,"eccv_18_annotation_files/")
    cis_test_annotations_file = os.path.join(full_path, "eccv_18_annotation_files/cis_test_annotations.json")
    cis_val_annotations_file =   os.path.join(full_path, "eccv_18_annotation_files/cis_val_annotations.json")
    train_annotations_file =   os.path.join(full_path, "eccv_18_annotation_files/train_annotations.json")
    trans_test_annotations_file =   os.path.join(full_path, "eccv_18_annotation_files/trans_test_annotations.json")
    trans_val_annotations_file =   os.path.join(full_path, "eccv_18_annotation_files/trans_val_annotations.json")
    annotations_file_list = [cis_test_annotations_file, cis_val_annotations_file, train_annotations_file, trans_test_annotations_file, trans_val_annotations_file]
    destination_folder = full_path

    stats = {}
    data = defaultdict(list)

    if not os.path.exists(destination_folder):
        os.mkdir(destination_folder)

    for annotations_file in annotations_file_list:
        annots = {}
        with open(annotations_file, "r") as f:
            annots = json.load(f)
            for k, v in annots.items():
                data[k].extend(v)


    category_dict = {}
    for item in data['categories']:
        category_dict[item['id']] = item['name']

    for image in data['images']:
        image_location = str(image['location'])

        if image_location not in include_locations:
            continue

        loc_folder = os.path.join(destination_folder,
                                  'location_' + str(image_location) + '/')

        if not os.path.exists(loc_folder):
            os.mkdir(loc_folder)

        image_id = image['id']
        image_fname = image['file_name']

        for annotation in data['annotations']:
            if annotation['image_id'] == image_id:
                if image_location not in stats:
                    stats[image_location] = {}

                category = category_dict[annotation['category_id']]

                if category not in include_categories:
                    continue

                if category not in stats[image_location]:
                    stats[image_location][category] = 0
                else:
                    stats[image_location][category] += 1

                loc_cat_folder = os.path.join(loc_folder, category + '/')

                if not os.path.exists(loc_cat_folder):
                    os.mkdir(loc_cat_folder)

                dst_path = os.path.join(loc_cat_folder, image_fname)
                src_path = os.path.join(images_folder, image_fname)

                shutil.copyfile(src_path, dst_path)

    shutil.rmtree(images_folder)
    shutil.rmtree(annotations_folder)


# SVIRO #################################################################

def download_sviro(data_dir):
    # Original URL: https://sviro.kl.dfki.de
    full_path = stage_path(data_dir, "sviro")

    download_and_extract("https://sviro.kl.dfki.de/?wpdmdl=1731",
                         os.path.join(data_dir, "sviro_grayscale_rectangle_classification.zip"))

    os.rename(os.path.join(data_dir, "SVIRO_DOMAINBED"),
              full_path)


# SPAWRIOUS #############################################################

def download_spawrious(data_dir, remove=True):
    dst = os.path.join(data_dir, "spawrious.tar.gz")
    urllib.request.urlretrieve('https://www.dropbox.com/s/e40j553480h3f3s/spawrious224.tar.gz?dl=1', dst)
    tar = tarfile.open(dst, "r:gz")
    tar.extractall(os.path.dirname(dst))
    tar.close()
    if remove:
        os.remove(dst)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Download datasets')
    parser.add_argument('--data_dir', type=str, default='/home/cola/transopt_files/data/')
    args = parser.parse_args()

    # download_mnist(args.data_dir)
    # download_pacs(args.data_dir)
    # download_office_home(args.data_dir)
    # download_domain_net(args.data_dir)
    # download_vlcs(args.data_dir)
    # download_terra_incognita(args.data_dir)
    # download_spawrious(args.data_dir)
    # download_sviro(args.data_dir)
    # Camelyon17Dataset(root_dir=args.data_dir, download=True)
    # FMoWDataset(root_dir=args.data_dir, download=True)


================================================
FILE: transopt/benchmark/HPOOOD/fast_data_loader.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import torch

class _InfiniteSampler(torch.utils.data.Sampler):
    """Wraps another Sampler to yield an infinite stream."""
    def __init__(self, sampler):
        self.sampler = sampler

    def __iter__(self):
        while True:
            for batch in self.sampler:
                yield batch

class InfiniteDataLoader:
    def __init__(self, dataset, weights, batch_size, num_workers):
        super().__init__()

        if weights is not None:
            sampler = torch.utils.data.WeightedRandomSampler(weights,
                replacement=True,
                num_samples=batch_size)
        else:
            sampler = torch.utils.data.RandomSampler(dataset,
                replacement=True)

        if weights == None:
            weights = torch.ones(len(dataset))

        batch_sampler = torch.utils.data.BatchSampler(
            sampler,
            batch_size=batch_size,
            drop_last=True)

        self._infinite_iterator = iter(torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=_InfiniteSampler(batch_sampler)
        ))

    def __iter__(self):
        while True:
            yield next(self._infinite_iterator)

    def __len__(self):
        raise ValueError

class FastDataLoader:
    """DataLoader wrapper with slightly improved speed by not respawning worker
    processes at every epoch."""
    def __init__(self, dataset, batch_size, num_workers):
        super().__init__()

        batch_sampler = torch.utils.data.BatchSampler(
            torch.utils.data.RandomSampler(dataset, replacement=False),
            batch_size=batch_size,
            drop_last=False
        )

        self._infinite_iterator = iter(torch.utils.data.DataLoader(
            dataset,
            num_workers=num_workers,
            batch_sampler=_InfiniteSampler(batch_sampler)
        ))

        self._length = len(batch_sampler)

    def __iter__(self):
        for _ in range(len(self)):
            yield next(self._infinite_iterator)

    def __len__(self):
        return self._length


================================================
FILE: transopt/benchmark/HPOOOD/hparams_registry.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np


def _define_hparam(hparams, hparam_name, default_val, random_val_fn):
    hparams[hparam_name] = (hparams, hparam_name, default_val, random_val_fn)


def _hparams(algorithm, dataset, random_seed):
    """
    Global registry of hyperparams. Each entry is a (default, random) tuple.
    New algorithms / networks / etc. should add entries here.
    """
    SMALL_IMAGES = ['Debug28', 'RotatedMNIST', 'ColoredMNIST']

    hparams = {}

    def _hparam(name, default_val, random_val_fn):
        """Define a hyperparameter. random_val_fn takes a RandomState and
        returns a random hyperparameter value."""
        assert(name not in hparams)
        random_state = np.random.RandomState(random_seed)
        hparams[name] = (default_val, random_val_fn(random_state))

    # Unconditional hparam definitions.

    _hparam('data_augmentation', True, lambda r: True)
    _hparam('resnet18', False, lambda r: False)
    _hparam('resnet_dropout', 0., lambda r: r.choice([0., 0.1, 0.5]))
    _hparam('class_balanced', False, lambda r: False)
    # TODO: nonlinear classifiers disabled
    _hparam('nonlinear_classifier', False,
            lambda r: bool(r.choice([False, False])))

    # Algorithm-specific hparam definitions. Each block of code below
    # corresponds to exactly one algorithm.

    if algorithm in ['DANN', 'CDANN']:
        _hparam('lambda', 1.0, lambda r: 10**r.uniform(-2, 2))
        _hparam('weight_decay_d', 0., lambda r: 10**r.uniform(-6, -2))
        _hparam('d_steps_per_g_step', 1, lambda r: int(2**r.uniform(0, 3)))
        _hparam('grad_penalty', 0., lambda r: 10**r.uniform(-2, 1))
        _hparam('beta1', 0.5, lambda r: r.choice([0., 0.5]))
        _hparam('mlp_width', 256, lambda r: int(2 ** r.uniform(6, 10)))
        _hparam('mlp_depth', 3, lambda r: int(r.choice([3, 4, 5])))
        _hparam('mlp_dropout', 0., lambda r: r.choice([0., 0.1, 0.5]))

    elif algorithm == 'Fish':
        _hparam('meta_lr', 0.5, lambda r:r.choice([0.05, 0.1, 0.5]))

    elif algorithm == "RDM": 
        if dataset in ['DomainNet']: 
            _hparam('rdm_lambda', 0.5, lambda r: r.uniform(0.1, 1.0))
        elif dataset in ['PACS', 'TerraIncognita']:
            _hparam('rdm_lambda', 5.0, lambda r: r.uniform(1.0, 10.0))
        else:
            _hparam('rdm_lambda', 5.0, lambda r: r.uniform(0.1, 10.0))
            
        if dataset == 'DomainNet':
            _hparam('rdm_penalty_anneal_iters', 2400, lambda r: int(r.uniform(1500, 3000)))
        else:
            _hparam('rdm_penalty_anneal_iters', 1500, lambda r: int(r.uniform(800, 2700)))
            
        if dataset in ['TerraIncognita', 'OfficeHome', 'DomainNet']:
            _hparam('variance_weight', 0.0, lambda r: r.choice([0.0]))
        else:
            _hparam('variance_weight', 0.004, lambda r: r.uniform(0.001, 0.007))
            
        _hparam('rdm_lr', 1.5e-5, lambda r: r.uniform(8e-6, 2e-5))

    elif algorithm == "RSC":
        _hparam('rsc_f_drop_factor', 1/3, lambda r: r.uniform(0, 0.5))
        _hparam('rsc_b_drop_factor', 1/3, lambda r: r.uniform(0, 0.5))

    elif algorithm == "SagNet":
        _hparam('sag_w_adv', 0.1, lambda r: 10**r.uniform(-2, 1))

    elif algorithm == "IRM":
        _hparam('irm_lambda', 1e2, lambda r: 10**r.uniform(-1, 5))
        _hparam('irm_penalty_anneal_iters', 500,
                lambda r: int(10**r.uniform(0, 4)))

    elif algorithm == "Mixup":
        _hparam('mixup_alpha', 0.2, lambda r: 10**r.uniform(-1, 1))

    elif algorithm == "GroupDRO":
        _hparam('groupdro_eta', 1e-2, lambda r: 10**r.uniform(-3, -1))

    elif algorithm == "MMD" or algorithm == "CORAL" or algorithm == "CausIRL_CORAL" or algorithm == "CausIRL_MMD":
        _hparam('mmd_gamma', 1., lambda r: 10**r.uniform(-1, 1))

    elif algorithm == "MLDG":
        _hparam('mldg_beta', 1., lambda r: 10**r.uniform(-1, 1))
        _hparam('n_meta_test', 2, lambda r:  r.choice([1, 2]))

    elif algorithm == "MTL":
        _hparam('mtl_ema', .99, lambda r: r.choice([0.5, 0.9, 0.99, 1.]))

    elif algorithm == "VREx":
        _hparam('vrex_lambda', 1e1, lambda r: 10**r.uniform(-1, 5))
        _hparam('vrex_penalty_anneal_iters', 500,
                lambda r: int(10**r.uniform(0, 4)))

    elif algorithm == "SD":
        _hparam('sd_reg', 0.1, lambda r: 10**r.uniform(-5, -1))

    elif algorithm == "ANDMask":
        _hparam('tau', 1, lambda r: r.uniform(0.5, 1.))

    elif algorithm == "IGA":
        _hparam('penalty', 1000, lambda r: 10**r.uniform(1, 5))

    elif algorithm == "SANDMask":
        _hparam('tau', 1.0, lambda r: r.uniform(0.0, 1.))
        _hparam('k', 1e+1, lambda r: 10**r.uniform(-3, 5))

    elif algorithm == "Fishr":
        _hparam('lambda', 1000., lambda r: 10**r.uniform(1., 4.))
        _hparam('penalty_anneal_iters', 1500, lambda r: int(r.uniform(0., 5000.)))
        _hparam('ema', 0.95, lambda r: r.uniform(0.90, 0.99))

    elif algorithm == "TRM":
        _hparam('cos_lambda', 1e-4, lambda r: 10 ** r.uniform(-5, 0))
        _hparam('iters', 200, lambda r: int(10 ** r.uniform(0, 4)))
        _hparam('groupdro_eta', 1e-2, lambda r: 10 ** r.uniform(-3, -1))

    elif algorithm == "IB_ERM":
        _hparam('ib_lambda', 1e2, lambda r: 10**r.uniform(-1, 5))
        _hparam('ib_penalty_anneal_iters', 500,
                lambda r: int(10**r.uniform(0, 4)))

    elif algorithm == "IB_IRM":
        _hparam('irm_lambda', 1e2, lambda r: 10**r.uniform(-1, 5))
        _hparam('irm_penalty_anneal_iters', 500,
                lambda r: int(10**r.uniform(0, 4)))
        _hparam('ib_lambda', 1e2, lambda r: 10**r.uniform(-1, 5))
        _hparam('ib_penalty_anneal_iters', 500,
                lambda r: int(10**r.uniform(0, 4)))

    elif algorithm == "CAD" or algorithm == "CondCAD":
        _hparam('lmbda', 1e-1, lambda r: r.choice([1e-4, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2]))
        _hparam('temperature', 0.1, lambda r: r.choice([0.05, 0.1]))
        _hparam('is_normalized', False, lambda r: False)
        _hparam('is_project', False, lambda r: False)
        _hparam('is_flipped', True, lambda r: True)
        
    elif algorithm == "Transfer":
        _hparam('t_lambda', 1.0, lambda r: 10**r.uniform(-2, 1))
        _hparam('delta', 2.0, lambda r: r.uniform(0.1, 3.0))
        _hparam('d_steps_per_g', 10, lambda r: int(r.choice([1, 2, 5])))
        _hparam('weight_decay_d', 0., lambda r: 10**r.uniform(-6, -2))
        _hparam('gda', False, lambda r: True)
        _hparam('beta1', 0.5, lambda r: r.choice([0., 0.5]))
        _hparam('lr_d', 1e-3, lambda r: 10**r.uniform(-4.5, -2.5))

    elif algorithm == 'EQRM':
        _hparam('eqrm_quantile', 0.75, lambda r: r.uniform(0.5, 0.99))
        _hparam('eqrm_burnin_iters', 2500, lambda r: 10 ** r.uniform(2.5, 3.5))
        _hparam('eqrm_lr', 1e-6, lambda r: 10 ** r.uniform(-7, -5))

    if algorithm == "ADRMX":
        _hparam('cnt_lambda', 1.0, lambda r: r.choice([1.0]))
        _hparam('dclf_lambda', 1.0, lambda r: r.choice([1.0]))
        _hparam('disc_lambda', 0.75, lambda r: r.choice([0.75]))
        _hparam('rmxd_lambda', 1.0, lambda r: r.choice([1.0]))
        _hparam('d_steps_per_g_step', 2, lambda r: r.choice([2]))
        _hparam('beta1', 0.5, lambda r: r.choice([0.5]))
        _hparam('mlp_width', 256, lambda r: r.choice([256]))
        _hparam('mlp_depth', 9, lambda r: int(r.choice([8, 9, 10])))
        _hparam('mlp_dropout', 0., lambda r: r.choice([0]))


    # Dataset-and-algorithm-specific hparam definitions. Each block of code
    # below corresponds to exactly one hparam. Avoid nested conditionals.

    if dataset in SMALL_IMAGES:
        if algorithm == "ADRMX":
            _hparam('lr', 3e-3, lambda r: r.choice([5e-4, 1e-3, 2e-3, 3e-3]))
        else:
            _hparam('lr', 1e-3, lambda r: 10**r.uniform(-4.5, -2.5))
    else:
        if algorithm == "ADRMX":
            _hparam('lr', 3e-5, lambda r: r.choice([2e-5, 3e-5, 4e-5, 5e-5]))
        else:
            _hparam('lr', 5e-5, lambda r: 10**r.uniform(-5, -3.5))

    if dataset in SMALL_IMAGES:
        _hparam('weight_decay', 0., lambda r: 0.)
    else:
        _hparam('weight_decay', 0., lambda r: 10**r.uniform(-6, -2))

    if dataset in SMALL_IMAGES:
        _hparam('batch_size', 64, lambda r: int(2**r.uniform(3, 9)))
    elif algorithm == 'ARM':
        _hparam('batch_size', 8, lambda r: 8)
    elif algorithm == 'RDM':
        if dataset in ['DomainNet', 'TerraIncognita']:
            _hparam('batch_size', 40, lambda r: int(r.uniform(30, 60)))
        else:
            _hparam('batch_size', 88, lambda r: int(r.uniform(70, 100)))
    elif dataset == 'DomainNet':
        _hparam('batch_size', 32, lambda r: int(2**r.uniform(3, 5)))
    else:
        _hparam('batch_size', 32, lambda r: int(2**r.uniform(3, 5.5)))

    if algorithm in ['DANN', 'CDANN'] and dataset in SMALL_IMAGES:
        _hparam('lr_g', 1e-3, lambda r: 10**r.uniform(-4.5, -2.5))
    elif algorithm in ['DANN', 'CDANN']:
        _hparam('lr_g', 5e-5, lambda r: 10**r.uniform(-5, -3.5))

    if algorithm in ['DANN', 'CDANN'] and dataset in SMALL_IMAGES:
        _hparam('lr_d', 1e-3, lambda r: 10**r.uniform(-4.5, -2.5))
    elif algorithm in ['DANN', 'CDANN']:
        _hparam('lr_d', 5e-5, lambda r: 10**r.uniform(-5, -3.5))

    if algorithm in ['DANN', 'CDANN'] and dataset in SMALL_IMAGES:
        _hparam('weight_decay_g', 0., lambda r: 0.)
    elif algorithm in ['DANN', 'CDANN']:
        _hparam('weight_decay_g', 0., lambda r: 10**r.uniform(-6, -2))

    return hparams


def default_hparams(algorithm, dataset):
    return {a: b for a, (b, c) in _hparams(algorithm, dataset, 0).items()}


def random_hparams(algorithm, dataset, seed):
    return {a: c for a, (b, c) in _hparams(algorithm, dataset, seed).items()}

def get_hparams(algorithm, dataset):
    hp =  _hparams(algorithm, dataset,0)
    pass


================================================
FILE: transopt/benchmark/HPOOOD/hpoood.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import numpy as np

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import os
import random
import collections
import time
import json
import shutil
import hashlib
import copy


from torchvision import datasets, transforms

from typing import Dict, Union


from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.optimizer.sampler.random import RandomSampler
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.space.variable import *
from transopt.benchmark.HPOOOD.hparams_registry import random_hparams, default_hparams, get_hparams
from transopt.benchmark.HPOOOD import ooddatasets
from transopt.benchmark.HPOOOD import misc
from transopt.benchmark.HPOOOD import algorithms
from transopt.benchmark.HPOOOD.fast_data_loader import InfiniteDataLoader, FastDataLoader


def make_record(step, hparams_seed, envs):
    """envs is a list of (in_acc, out_acc, is_test_env) tuples"""
    result = {
        'args': {'test_envs': [], 'hparams_seed': hparams_seed},
        'step': step
    }
    for i, (in_acc, out_acc, is_test_env) in enumerate(envs):
        if is_test_env:
            result['args']['test_envs'].append(i)
        result[f'env{i}_in_acc'] = in_acc
        result[f'env{i}_out_acc'] = out_acc
    return result
        
        
class HPOOOD_base(NonTabularProblem):
    DATASETS = [
    # Small images
    "ColoredMNIST",
    "RotatedMNIST",
    # Big images
    "VLCS",
    "PACS",
    "OfficeHome",
    "TerraIncognita",
    "DomainNet",
    "SVIRO",
    # WILDS datasets
    "WILDSCamelyon",
    "WILDSFMoW",
    # Spawrious datasets
    "SpawriousO2O_easy",
    "SpawriousO2O_medium",
    "SpawriousO2O_hard",
    "SpawriousM2M_easy",
    "SpawriousM2M_medium",
    "SpawriousM2M_hard",
    ]

    problem_type = 'hpoood'
    num_variables = 10
    num_objectives = 1
    workloads = []
    fidelity = None
    
    ALGORITHMS = [
        'ERM',
        'Fish',
        'IRM',
        'GroupDRO',
        'Mixup',
        'MLDG',
        'CORAL',
        'MMD',
        'DANN',
        'CDANN',
        'MTL',
        'SagNet',
        'ARM',
        'VREx',
        'RSC',
        'SD',
        'ANDMask',
        'SANDMask',
        'IGA',
        'SelfReg',
        "Fishr",
        'TRM',
        'IB_ERM',
        'IB_IRM',
        'CAD',
        'CondCAD',
        'Transfer',
        'CausIRL_CORAL',
        'CausIRL_MMD',
        'EQRM',
        'RDM',
        'ADRMX',
    ]

    def __init__(
        self, task_name, budget_type, budget, seed, workload, algorithm
        ):
        self.dataset_name = HPOOOD_base.DATASETS[workload]
        self.algorithm_name = algorithm
        self.test_envs = [0,1]
        self.data_dir = '/home/cola/transopt_files/data/'
        self.output_dir = f'/home/cola/transopt_files/output/'
        self.holdout_fraction = 0.2
        self.validate_fraction = 0.1
        self.uda_holdout_fraction = 0.8
        self.task = 'domain_generalization'
        self.steps = 500
        self.checkpoint_freq = 50
        self.query = 0
        
        self.save_model_every_checkpoint = False
        
        self.skip_model_save = False
        
        self.trial_seed = seed
        
        self.model_save_dir = self.output_dir + f'models/{self.algorithm_name}_{self.dataset_name}_{seed}/'
        self.results_save_dir = self.output_dir + f'results/{self.algorithm_name}_{self.dataset_name}_{seed}/'
        
        print(f"Selected algorithm: {self.algorithm_name}, dataset: {self.dataset_name}")
        
        os.makedirs(self.model_save_dir, exist_ok=True)
        os.makedirs(self.results_save_dir, exist_ok=True)
        super(HPOOOD_base, self).__init__(
            task_name=task_name,
            budget=budget,
            budget_type=budget_type,
            seed=seed,
            workload=workload,
        )

        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)

        self.hparams = default_hparams(self.algorithm_name, self.dataset_name)

        if self.dataset_name in vars(ooddatasets):
            self.dataset = vars(ooddatasets)[self.dataset_name](self.data_dir,
                self.test_envs, self.hparams)
        else:
            raise NotImplementedError
        
        in_splits = []
        val_splits = []
        out_splits = []
        uda_splits = []
        
        for env_i, env in enumerate(self.dataset):
            uda = []

            out, in_ = misc.split_dataset(env,
                int(len(env)*self.holdout_fraction),
                misc.seed_hash(self.seed, env_i))
            
            val, in_ = misc.split_dataset(in_,
                int(len(in_)*self.validate_fraction),
                misc.seed_hash(self.seed, env_i))

            if env_i in self.test_envs:
                uda, in_ = misc.split_dataset(in_,
                    int(len(in_)*self.uda_holdout_fraction),
                    misc.seed_hash(self.trial_seed, env_i))

            if self.hparams['class_balanced']:
                in_weights = misc.make_weights_for_balanced_classes(in_)
                val_weights = misc.make_weights_for_balanced_classes(val)
                out_weights = misc.make_weights_for_balanced_classes(out)
                if uda is not None:
                    uda_weights = misc.make_weights_for_balanced_classes(uda)
            else:
                in_weights, val_weights, out_weights, uda_weights = None, None, None, None
            in_splits.append((in_, in_weights))
            val_splits.append((val, val_weights))
            out_splits.append((out, out_weights))
            if len(uda):
                uda_splits.append((uda, uda_weights))
            if self.task == "domain_adaptation" and len(uda_splits) == 0:
                raise ValueError("Not enough unlabeled samples for domain adaptation.")

        self.train_loaders = [InfiniteDataLoader(
            dataset=env,
            weights=env_weights,
            batch_size=self.hparams['batch_size'],
            num_workers=self.dataset.N_WORKERS)
            for i, (env, env_weights) in enumerate(in_splits) 
            if i not in self.test_envs]
        
        self.val_loaders = [InfiniteDataLoader(
            dataset=env,
            weights=env_weights,
            batch_size=self.hparams['batch_size'],
            num_workers=self.dataset.N_WORKERS)
            for i, (env, env_weights) in enumerate(val_splits)
            if i not in self.test_envs]

        self.uda_loaders = [InfiniteDataLoader(
            dataset=env,
            weights=env_weights,
            batch_size=self.hparams['batch_size'],
            num_workers=self.dataset.N_WORKERS)
            for i, (env, env_weights) in enumerate(uda_splits)]

        self.eval_loaders = [FastDataLoader(
            dataset=env,
            batch_size=64,
            num_workers=self.dataset.N_WORKERS)
            for env, _ in (in_splits + val_splits + out_splits + uda_splits)]
    
     
        self.eval_weights = [None for _, weights in (in_splits + val_splits + out_splits + uda_splits)]
        self.eval_loader_names = ['env{}_in'.format(i)
            for i in range(len(in_splits))]
        self.eval_loader_names += ['env{}_val'.format(i)
            for i in range(len(val_splits))]
        self.eval_loader_names += ['env{}_out'.format(i)
            for i in range(len(out_splits))]
        self.eval_loader_names += ['env{}_uda'.format(i)
            for i in range(len(uda_splits))]
        
        self.train_minibatches_iterator = zip(*self.train_loaders)
        self.uda_minibatches_iterator = zip(*self.uda_loaders)
        self.checkpoint_vals = collections.defaultdict(lambda: [])

        self.steps_per_epoch = min([len(env)/self.hparams['batch_size'] for env,_ in in_splits])
        
        if torch.cuda.is_available():
            self.device = torch.device(f"cuda:{self.trial_seed}")

        else:
            self.device = "cpu"
        

    def save_checkpoint(self, filename):
        if self.skip_model_save:
            return
        save_dict = {
            "model_input_shape": self.dataset.input_shape,
            "model_num_classes": self.dataset.num_classes,
            "model_num_domains": len(self.dataset) - len(self.test_envs),
            "model_hparams": self.hparams,
            "model_dict": self.algorithm.state_dict()
        }
        torch.save(save_dict, os.path.join(self.model_save_dir, filename))

    def get_configuration_space(
        self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        variables=[Continuous('lr', [-8.0, 0.0]),
            Continuous('weight_decay', [-10.0, -5.0]),
            ]
        ss = SearchSpace(variables)
        self.hparam = ss
        return ss

    def get_fidelity_space(
        self, seed: Union[int, None] = None):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """

        # return fidel_space
        fs = FidelitySpace([])
        return fs
    def train(self, configuration: dict):
        
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
        self.hparams = default_hparams(self.algorithm_name, self.dataset_name)
        self.hparams['lr'] = configuration["lr"]
        self.hparams['weight_decay'] = configuration["weight_decay"]
        self.steps = configuration['epoch']
        print(self.steps)

        n_steps = self.steps or self.dataset.N_STEPS
        
        last_results_keys = None
        
        start_step = 0
        
        for step in range(start_step, n_steps):
            step_start_time = time.time()
            minibatches_device = [(x.to(self.device), y.to(self.device))
                for x,y in next(self.train_minibatches_iterator)]
            if self.task == "domain_adaptation":
                uda_device = [x.to(self.device)
                    for x,_ in next(self.uda_minibatches_iterator)]
            else:
                uda_device = None
            step_vals = self.algorithm.update(minibatches_device, uda_device)
            self.checkpoint_vals['step_time'].append(time.time() - step_start_time)

            for key, val in step_vals.items():
                self.checkpoint_vals[key].append(val)

            if (step % self.checkpoint_freq == 0) or (step == n_steps - 1):
                results = {
                    'step': step,
                    'epoch': step / self.steps_per_epoch,
                }

                for key, val in self.checkpoint_vals.items():
                    results[key] = np.mean(val)

                evals = zip(self.eval_loader_names, self.eval_loaders, self.eval_weights)
                for name, loader, weights in evals:
                    acc = misc.accuracy(self.algorithm, loader, weights, self.device)
                    results[name+'_acc'] = acc

                results['mem_gb'] = torch.cuda.max_memory_allocated() / (1024.*1024.*1024.)

                results_keys = sorted(results.keys())
                if results_keys != last_results_keys:
                    misc.print_row(results_keys, colwidth=12)
                    last_results_keys = results_keys
                misc.print_row([results[key] for key in results_keys],
                    colwidth=12)

                results.update({
                    'hparams': self.hparams,
                })

                start_step = step + 1

                if self.save_model_every_checkpoint:
                    self.save_checkpoint(f'model_step{step}.pkl')
        
        self.save_checkpoint('model.pkl')
        with open(os.path.join(self.model_save_dir, 'done'), 'w') as f:
            f.write('done')
        
        return results
    
    
    def get_score(self, configuration: dict):
        algorithm_class = algorithms.get_algorithm_class(self.algorithm_name)
        self.algorithm = algorithm_class(self.dataset.input_shape, self.dataset.num_classes,
            len(self.dataset) - len(self.test_envs), self.hparams)
        self.algorithm.to(self.device)
        
        self.query += 1
        results = self.train(configuration)
        
        epochs_path = os.path.join(self.results_save_dir, f"{self.query}_lr_{configuration['lr']}_weight_decay_{configuration['weight_decay']}.jsonl")
        with open(epochs_path, 'a') as f:
            f.write(json.dumps(results, sort_keys=True) + "\n")


        val_acc = [i[1] for i in results.items() if 'val' in i[0]]
        avg_val_acc = np.mean(val_acc)
        
        test_acc = [i[1] for i in results.items() if 'out' in i[0]]
        avg_test_acc = np.mean(test_acc)
        
        return avg_val_acc, avg_test_acc
        

    def objective_function(
        self,
        configuration,
        fidelity = None,
        seed = None,
        **kwargs
    ) -> Dict:

            
        if 'epoch' in kwargs:
            epoch = kwargs['epoch']
        else:
            epoch = 500
            
        if fidelity is None:
            fidelity = {"epoch": epoch, "data_frac": 0.8}
        c = {
            "lr": np.exp2(configuration["lr"]),
            "weight_decay": np.exp2(configuration["weight_decay"]),
            "batch_size": 64,
            "epoch": fidelity["epoch"],
        }
        val_acc, test_acc = self.get_score(c)

        results = {list(self.objective_info.keys())[0]: float(1 - val_acc)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results
    
    def get_objectives(self) -> Dict:
        return {'function_value': 'minimize'}
    
    def get_problem_type(self):
        return "hpo"
    
    
@problem_registry.register("ERMOOD")
class ERMOOD(HPOOOD_base):    
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):
        super(ERMOOD, self).__init__(task_name=task_name, budget_type=budget_type, budget=budget, seed = seed, workload = workload, algorithm='ERM')

@problem_registry.register("IRMOOD")
class IRMOOD(HPOOOD_base):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):
        super(IRMOOD, self).__init__(task_name=task_name, budget_type=budget_type, budget=budget, seed = seed, workload = workload, algorithm='IRM')

@problem_registry.register("ARMOOD")
class ARMOOD(HPOOOD_base):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):
        super(ARMOOD, self).__init__(task_name=task_name, budget_type=budget_type, budget=budget, seed = seed, workload = workload, algorithm='ARM')

@problem_registry.register("MixupOOD")
class MixupOOD(HPOOOD_base):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):
        super(MixupOOD, self).__init__(task_name=task_name, budget_type=budget_type, budget=budget, seed = seed, workload = workload, algorithm='Mixup')

@problem_registry.register("DANNOOD")
class DANNOOD(HPOOOD_base):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
        ):
        super(DANNOOD, self).__init__(task_name=task_name, budget_type=budget_type, budget=budget, seed = seed, workload = workload, algorithm='DANN')
        

if __name__ == "__main__":
    p = MixupOOD(task_name='', budget_type='FEs', budget=100, seed = 0, workload = 2)
    configuration = {
        "lr": -0.3,
        "weight_decay": -5,
    }
    p.f(configuration=configuration)
    

================================================
FILE: transopt/benchmark/HPOOOD/misc.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

"""
Things that don't belong anywhere else
"""

import math
import hashlib
import sys
from collections import OrderedDict
from numbers import Number
import operator

import numpy as np
import torch
from collections import Counter
from itertools import cycle


def distance(h1, h2):
    ''' distance of two networks (h1, h2 are classifiers)'''
    dist = 0.
    for param in h1.state_dict():
        h1_param, h2_param = h1.state_dict()[param], h2.state_dict()[param]
        dist += torch.norm(h1_param - h2_param) ** 2  # use Frobenius norms for matrices
    return torch.sqrt(dist)

def proj(delta, adv_h, h):
    ''' return proj_{B(h, \delta)}(adv_h), Euclidean projection to Euclidean ball'''
    ''' adv_h and h are two classifiers'''
    dist = distance(adv_h, h)
    if dist <= delta:
        return adv_h
    else:
        ratio = delta / dist
        for param_h, param_adv_h in zip(h.parameters(), adv_h.parameters()):
            param_adv_h.data = param_h + ratio * (param_adv_h - param_h)
        # print("distance: ", distance(adv_h, h))
        return adv_h

def l2_between_dicts(dict_1, dict_2):
    assert len(dict_1) == len(dict_2)
    dict_1_values = [dict_1[key] for key in sorted(dict_1.keys())]
    dict_2_values = [dict_2[key] for key in sorted(dict_1.keys())]
    return (
        torch.cat(tuple([t.view(-1) for t in dict_1_values])) -
        torch.cat(tuple([t.view(-1) for t in dict_2_values]))
    ).pow(2).mean()

class MovingAverage:

    def __init__(self, ema, oneminusema_correction=True):
        self.ema = ema
        self.ema_data = {}
        self._updates = 0
        self._oneminusema_correction = oneminusema_correction

    def update(self, dict_data):
        ema_dict_data = {}
        for name, data in dict_data.items():
            data = data.view(1, -1)
            if self._updates == 0:
                previous_data = torch.zeros_like(data)
            else:
                previous_data = self.ema_data[name]

            ema_data = self.ema * previous_data + (1 - self.ema) * data
            if self._oneminusema_correction:
                # correction by 1/(1 - self.ema)
                # so that the gradients amplitude backpropagated in data is independent of self.ema
                ema_dict_data[name] = ema_data / (1 - self.ema)
            else:
                ema_dict_data[name] = ema_data
            self.ema_data[name] = ema_data.clone().detach()

        self._updates += 1
        return ema_dict_data


def make_weights_for_balanced_classes(dataset):
    counts = Counter()
    classes = []
    for _, y in dataset:
        y = int(y)
        counts[y] += 1
        classes.append(y)

    n_classes = len(counts)

    weight_per_class = {}
    for y in counts:
        weight_per_class[y] = 1 / (counts[y] * n_classes)

    weights = torch.zeros(len(dataset))
    for i, y in enumerate(classes):
        weights[i] = weight_per_class[int(y)]

    return weights

def pdb():
    sys.stdout = sys.__stdout__
    import pdb
    print("Launching PDB, enter 'n' to step to parent function.")
    pdb.set_trace()

def seed_hash(*args):
    """
    Derive an integer hash from all args, for use as a random seed.
    """
    args_str = str(args)
    return int(hashlib.md5(args_str.encode("utf-8")).hexdigest(), 16) % (2**31)

def print_separator():
    print("="*80)

def print_row(row, colwidth=10, latex=False):
    if latex:
        sep = " & "
        end_ = "\\\\"
    else:
        sep = "  "
        end_ = ""

    def format_val(x):
        if np.issubdtype(type(x), np.floating):
            x = "{:.10f}".format(x)
        return str(x).ljust(colwidth)[:colwidth]
    print(sep.join([format_val(x) for x in row]), end_)

class _SplitDataset(torch.utils.data.Dataset):
    """Used by split_dataset"""
    def __init__(self, underlying_dataset, keys):
        super(_SplitDataset, self).__init__()
        self.underlying_dataset = underlying_dataset
        self.keys = keys
    def __getitem__(self, key):
        return self.underlying_dataset[self.keys[key]]
    def __len__(self):
        return len(self.keys)

def split_dataset(dataset, n, seed=0):
    """
    Return a pair of datasets corresponding to a random split of the given
    dataset, with n datapoints in the first dataset and the rest in the last,
    using the given random seed
    """
    assert(n <= len(dataset))
    keys = list(range(len(dataset)))
    np.random.RandomState(seed).shuffle(keys)
    keys_1 = keys[:n]
    keys_2 = keys[n:]
    return _SplitDataset(dataset, keys_1), _SplitDataset(dataset, keys_2)


def random_pairs_of_minibatches(minibatches):
    perm = torch.randperm(len(minibatches)).tolist()
    pairs = []

    for i in range(len(minibatches)):
        j = i + 1 if i < (len(minibatches) - 1) else 0

        xi, yi = minibatches[perm[i]][0], minibatches[perm[i]][1]
        xj, yj = minibatches[perm[j]][0], minibatches[perm[j]][1]

        min_n = min(len(xi), len(xj))

        pairs.append(((xi[:min_n], yi[:min_n]), (xj[:min_n], yj[:min_n])))

    return pairs

def split_meta_train_test(minibatches, num_meta_test=1):
    n_domains = len(minibatches)
    perm = torch.randperm(n_domains).tolist()
    pairs = []
    meta_train = perm[:(n_domains-num_meta_test)]
    meta_test = perm[-num_meta_test:]

    for i,j in zip(meta_train, cycle(meta_test)):
         xi, yi = minibatches[i][0], minibatches[i][1]
         xj, yj = minibatches[j][0], minibatches[j][1]

         min_n = min(len(xi), len(xj))
         pairs.append(((xi[:min_n], yi[:min_n]), (xj[:min_n], yj[:min_n])))

    return pairs

def accuracy(network, loader, weights, device):
    correct = 0
    total = 0
    weights_offset = 0

    network.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device)
            p = network.predict(x)
            if weights is None:
                batch_weights = torch.ones(len(x))
            else:
                batch_weights = weights[weights_offset : weights_offset + len(x)]
                weights_offset += len(x)
            batch_weights = batch_weights.to(device)
            if p.size(1) == 1:
                correct += (p.gt(0).eq(y).float() * batch_weights.view(-1, 1)).sum().item()
            else:
                correct += (p.argmax(1).eq(y).float() * batch_weights).sum().item()
            total += batch_weights.sum().item()
    network.train()

    return correct / total

class Tee:
    def __init__(self, fname, mode="a"):
        self.stdout = sys.stdout
        self.file = open(fname, mode)

    def write(self, message):
        self.stdout.write(message)
        self.file.write(message)
        self.flush()

    def flush(self):
        self.stdout.flush()
        self.file.flush()

class ParamDict(OrderedDict):
    """Code adapted from https://github.com/Alok/rl_implementations/tree/master/reptile.
    A dictionary where the values are Tensors, meant to represent weights of
    a model. This subclass lets you perform arithmetic on weights directly."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, *kwargs)

    def _prototype(self, other, op):
        if isinstance(other, Number):
            return ParamDict({k: op(v, other) for k, v in self.items()})
        elif isinstance(other, dict):
            return ParamDict({k: op(self[k], other[k]) for k in self})
        else:
            raise NotImplementedError

    def __add__(self, other):
        return self._prototype(other, operator.add)

    def __rmul__(self, other):
        return self._prototype(other, operator.mul)

    __mul__ = __rmul__

    def __neg__(self):
        return ParamDict({k: -v for k, v in self.items()})

    def __rsub__(self, other):
        # a- b := a + (-b)
        return self.__add__(other.__neg__())

    __sub__ = __rsub__

    def __truediv__(self, other):
        return self._prototype(other, operator.truediv)


############################################################
# A general PyTorch implementation of KDE. Builds on:
# https://github.com/EugenHotaj/pytorch-generative/blob/master/pytorch_generative/models/kde.py
############################################################

class Kernel(torch.nn.Module):
    """Base class which defines the interface for all kernels."""

    def __init__(self, bw=None):
        super().__init__()
        self.bw = 0.05 if bw is None else bw

    def _diffs(self, test_Xs, train_Xs):
        """Computes difference between each x in test_Xs with all train_Xs."""
        test_Xs = test_Xs.view(test_Xs.shape[0], 1, *test_Xs.shape[1:])
        train_Xs = train_Xs.view(1, train_Xs.shape[0], *train_Xs.shape[1:])
        return test_Xs - train_Xs

    def forward(self, test_Xs, train_Xs):
        """Computes p(x) for each x in test_Xs given train_Xs."""

    def sample(self, train_Xs):
        """Generates samples from the kernel distribution."""


class GaussianKernel(Kernel):
    """Implementation of the Gaussian kernel."""

    def forward(self, test_Xs, train_Xs):
        diffs = self._diffs(test_Xs, train_Xs)
        dims = tuple(range(len(diffs.shape))[2:])
        if dims == ():
            x_sq = diffs ** 2
        else:
            x_sq = torch.norm(diffs, p=2, dim=dims) ** 2

        var = self.bw ** 2
        exp = torch.exp(-x_sq / (2 * var))
        coef = 1. / torch.sqrt(2 * np.pi * var)

        return (coef * exp).mean(dim=1)

    def sample(self, train_Xs):
        # device = train_Xs.device
        noise = torch.randn(train_Xs.shape) * self.bw
        return train_Xs + noise

    def cdf(self, test_Xs, train_Xs):
        mus = train_Xs                                                      # kernel centred on each observation
        sigmas = torch.ones(len(mus), device=test_Xs.device) * self.bw      # bandwidth = stddev
        x_ = test_Xs.repeat(len(mus), 1).T                                  # repeat to allow broadcasting below
        return torch.mean(torch.distributions.Normal(mus, sigmas).cdf(x_))


def estimate_bandwidth(x, method="silverman"):
    x_, _ = torch.sort(x)
    n = len(x_)
    sample_std = torch.std(x_, unbiased=True)

    if method == 'silverman':
        # https://en.wikipedia.org/wiki/Kernel_density_estimation#A_rule-of-thumb_bandwidth_estimator
        iqr = torch.quantile(x_, 0.75) - torch.quantile(x_, 0.25)
        bandwidth = 0.9 * torch.min(sample_std, iqr / 1.34) * n ** (-0.2)

    elif method.lower() == 'gauss-optimal':
        bandwidth = 1.06 * sample_std * (n ** -0.2)

    else:
        raise ValueError(f"Invalid method selected: {method}.")

    return bandwidth


class KernelDensityEstimator(torch.nn.Module):
    """The KernelDensityEstimator model."""

    def __init__(self, train_Xs, kernel='gaussian', bw_select='Gauss-optimal'):
        """Initializes a new KernelDensityEstimator.
        Args:
            train_Xs: The "training" data to use when estimating probabilities.
            kernel: The kernel to place on each of the train_Xs.
        """
        super().__init__()
        self.train_Xs = train_Xs
        self._n_kernels = len(self.train_Xs)

        if bw_select is not None:
            self.bw = estimate_bandwidth(self.train_Xs, bw_select)
        else:
            self.bw = None

        if kernel.lower() == 'gaussian':
            self.kernel = GaussianKernel(self.bw)
        else:
            raise NotImplementedError(f"'{kernel}' kernel not implemented.")

    @property
    def device(self):
        return self.train_Xs.device

    # TODO(eugenhotaj): This method consumes O(train_Xs * x) memory. Implement an iterative version instead.
    def forward(self, x):
        return self.kernel(x, self.train_Xs)

    def sample(self, n_samples):
        idxs = np.random.choice(range(self._n_kernels), size=n_samples)
        return self.kernel.sample(self.train_Xs[idxs])

    def cdf(self, x):
        return self.kernel.cdf(x, self.train_Xs)


############################################################
# PyTorch implementation of 1D distributions.
############################################################

EPS = 1e-16


class Distribution1D:
    def __init__(self, dist_function=None):
        """
        :param dist_function: function to instantiate the distribution (self.dist).
        :param parameters: list of parameters in the correct order for dist_function.
        """
        self.dist = None
        self.dist_function = dist_function

    @property
    def parameters(self):
        raise NotImplementedError

    def create_dist(self):
        if self.dist_function is not None:
            return self.dist_function(*self.parameters)
        else:
            raise NotImplementedError("No distribution function was specified during intialization.")

    def estimate_parameters(self, x):
        raise NotImplementedError

    def log_prob(self, x):
        return self.create_dist().log_prob(x)

    def cdf(self, x):
        return self.create_dist().cdf(x)

    def icdf(self, q):
        return self.create_dist().icdf(q)

    def sample(self, n=1):
        if self.dist is None:
            self.dist = self.create_dist()
        n_ = torch.Size([]) if n == 1 else (n,)
        return self.dist.sample(n_)

    def sample_n(self, n=10):
        return self.sample(n)


def continuous_bisect_fun_left(f, v, lo, hi, n_steps=32):
    val_range = [lo, hi]
    k = 0.5 * sum(val_range)
    for _ in range(n_steps):
        val_range[int(f(k) > v)] = k
        next_k = 0.5 * sum(val_range)
        if next_k == k:
            break
        k = next_k
    return k


class Normal(Distribution1D):
    def __init__(self, location=0, scale=1):
        self.location = location
        self.scale = scale
        super().__init__(torch.distributions.Normal)

    @property
    def parameters(self):
        return [self.location, self.scale]

    def estimate_parameters(self, x):
        mean = sum(x) / len(x)
        var = sum([(x_i - mean) ** 2 for x_i in x]) / (len(x) - 1)
        self.location = mean
        self.scale = torch.sqrt(var + EPS)

    def icdf(self, q):
        if q >= 0:
            return super().icdf(q)

        else:
            # To get q *very* close to 1 without numerical issues, we:
            # 1) Use q < 0 to represent log(y), where q = 1 - y.
            # 2) Use the inverse-normal-cdf approximation here:
            #    https://math.stackexchange.com/questions/2964944/asymptotics-of-inverse-of-normal-cdf
            log_y = q
            return self.location + self.scale * math.sqrt(-2 * log_y)


class Nonparametric(Distribution1D):
    def __init__(self, use_kde=True, bw_select='Gauss-optimal'):
        self.use_kde = use_kde
        self.bw_select = bw_select
        self.bw, self.data, self.kde = None, None, None
        super().__init__()

    @property
    def parameters(self):
        return []

    def estimate_parameters(self, x):
        self.data, _ = torch.sort(x)

        if self.use_kde:
            self.kde = KernelDensityEstimator(self.data, bw_select=self.bw_select)
            self.bw = torch.ones(1, device=self.data.device) * self.kde.bw

    def icdf(self, q):
        if not self.use_kde:
            # Empirical or step CDF. Differentiable as torch.quantile uses (linear) interpolation.
            return torch.quantile(self.data, float(q))

        if q >= 0:
            # Find quantile via binary search on the KDE CDF
            lo = torch.distributions.Normal(self.data[0], self.bw[0]).icdf(q)
            hi = torch.distributions.Normal(self.data[-1], self.bw[-1]).icdf(q)
            return continuous_bisect_fun_left(self.kde.cdf, q, lo, hi)

        else:
            # To get q *very* close to 1 without numerical issues, we:
            # 1) Use q < 0 to represent log(y), where q = 1 - y.
            # 2) Use the inverse-normal-cdf approximation here:
            #    https://math.stackexchange.com/questions/2964944/asymptotics-of-inverse-of-normal-cdf
            log_y = q
            v = torch.mean(self.data + self.bw * math.sqrt(-2 * log_y))
            return v


############################################################
# Supervised Contrastive Loss implementation from:
# https://arxiv.org/abs/2004.11362
############################################################
class SupConLossLambda(torch.nn.Module):
    def __init__(self, lamda: float=0.5, temperature: float=0.07):
        super(SupConLossLambda, self).__init__()
        self.temperature = temperature
        self.lamda = lamda

    def forward(self, features: torch.Tensor, labels: torch.Tensor, domain_labels: torch.Tensor) -> torch.Tensor:
        batch_size, _ = features.shape
        normalized_features = torch.nn.functional.normalize(features, p=2, dim=1)
        # create a lookup table for pairwise dot prods
        pairwise_dot_prods = torch.matmul(normalized_features, normalized_features.T)/self.temperature
        loss = 0
        nans = 0
        for i, (label, domain_label) in enumerate(zip(labels, domain_labels)):
            # take the positive and negative samples wrt in/out domain            
            cond_pos_in_domain = torch.logical_and(labels==label, domain_labels == domain_label) # take all positives
            cond_pos_in_domain[i] = False # exclude itself
            cond_pos_out_domain = torch.logical_and(labels==label, domain_labels != domain_label)
            cond_neg_in_domain = torch.logical_and(labels!=label, domain_labels == domain_label)
            cond_neg_out_domain = torch.logical_and(labels!=label, domain_labels != domain_label)

            pos_feats_in_domain = pairwise_dot_prods[cond_pos_in_domain]
            pos_feats_out_domain = pairwise_dot_prods[cond_pos_out_domain]
            neg_feats_in_domain = pairwise_dot_prods[cond_neg_in_domain]
            neg_feats_out_domain = pairwise_dot_prods[cond_neg_out_domain]
            

            # calculate nominator and denominator wrt lambda scaling
            scaled_exp_term = torch.cat((self.lamda * torch.exp(pos_feats_in_domain[:, i]), (1 - self.lamda) * torch.exp(pos_feats_out_domain[:, i])))
            scaled_denom_const = torch.sum(torch.cat((self.lamda * torch.exp(neg_feats_in_domain[:, i]), (1 - self.lamda) * torch.exp(neg_feats_out_domain[:, i]), scaled_exp_term))) + 1e-5
            
            # nof positive samples
            num_positives = pos_feats_in_domain.shape[0] + pos_feats_out_domain.shape[0] # total positive samples
            log_fraction = torch.log((scaled_exp_term / scaled_denom_const) + 1e-5) # take log fraction
            loss_i = torch.sum(log_fraction) / num_positives
            if torch.isnan(loss_i):
                nans += 1
                continue
            loss -= loss_i # sum and average over num positives
        return loss/(batch_size-nans+1) # avg over batch


================================================
FILE: transopt/benchmark/HPOOOD/networks.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models

from transopt.benchmark.HPOOOD import wide_resnet
import copy


def remove_batch_norm_from_resnet(model):
    fuse = torch.nn.utils.fusion.fuse_conv_bn_eval
    model.eval()

    model.conv1 = fuse(model.conv1, model.bn1)
    model.bn1 = Identity()

    for name, module in model.named_modules():
        if name.startswith("layer") and len(name) == 6:
            for b, bottleneck in enumerate(module):
                for name2, module2 in bottleneck.named_modules():
                    if name2.startswith("conv"):
                        bn_name = "bn" + name2[-1]
                        setattr(bottleneck, name2,
                                fuse(module2, getattr(bottleneck, bn_name)))
                        setattr(bottleneck, bn_name, Identity())
                if isinstance(bottleneck.downsample, torch.nn.Sequential):
                    bottleneck.downsample[0] = fuse(bottleneck.downsample[0],
                                                    bottleneck.downsample[1])
                    bottleneck.downsample[1] = Identity()
    model.train()
    return model


class Identity(nn.Module):
    """An identity layer"""
    def __init__(self):
        super(Identity, self).__init__()

    def forward(self, x):
        return x


class MLP(nn.Module):
    """Just  an MLP"""
    def __init__(self, n_inputs, n_outputs, hparams):
        super(MLP, self).__init__()
        self.input = nn.Linear(n_inputs, hparams['mlp_width'])
        self.dropout = nn.Dropout(hparams['mlp_dropout'])
        self.hiddens = nn.ModuleList([
            nn.Linear(hparams['mlp_width'], hparams['mlp_width'])
            for _ in range(hparams['mlp_depth']-2)])
        self.output = nn.Linear(hparams['mlp_width'], n_outputs)
        self.n_outputs = n_outputs

    def forward(self, x):
        x = self.input(x)
        x = self.dropout(x)
        x = F.relu(x)
        for hidden in self.hiddens:
            x = hidden(x)
            x = self.dropout(x)
            x = F.relu(x)
        x = self.output(x)
        return x


class ResNet(torch.nn.Module):
    """ResNet with the softmax chopped off and the batchnorm frozen"""
    def __init__(self, input_shape, hparams):
        super(ResNet, self).__init__()
        if hparams['resnet18']:
            self.network = torchvision.models.resnet18(pretrained=True)
            self.n_outputs = 512
        else:
            self.network = torchvision.models.resnet50(pretrained=True)
            self.n_outputs = 2048

        # self.network = remove_batch_norm_from_resnet(self.network)

        # adapt number of channels
        nc = input_shape[0]
        if nc != 3:
            tmp = self.network.conv1.weight.data.clone()

            self.network.conv1 = nn.Conv2d(
                nc, 64, kernel_size=(7, 7),
                stride=(2, 2), padding=(3, 3), bias=False)

            for i in range(nc):
                self.network.conv1.weight.data[:, i, :, :] = tmp[:, i % 3, :, :]

        # save memory
        del self.network.fc
        self.network.fc = Identity()

        self.freeze_bn()
        self.hparams = hparams
        self.dropout = nn.Dropout(hparams['resnet_dropout'])

    def forward(self, x):
        """Encode x into a feature vector of size n_outputs."""
        return self.dropout(self.network(x))

    def train(self, mode=True):
        """
        Override the default train() to freeze the BN parameters
        """
        super().train(mode)
        self.freeze_bn()

    def freeze_bn(self):
        for m in self.network.modules():
            if isinstance(m, nn.BatchNorm2d):
                m.eval()


class MNIST_CNN(nn.Module):
    """
    Hand-tuned architecture for MNIST.
    Weirdness I've noticed so far with this architecture:
    - adding a linear layer after the mean-pool in features hurts
        RotatedMNIST-100 generalization severely.
    """
    n_outputs = 128

    def __init__(self, input_shape):
        super(MNIST_CNN, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 64, 3, 1, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(128, 128, 3, 1, padding=1)
        self.conv4 = nn.Conv2d(128, 128, 3, 1, padding=1)

        self.bn0 = nn.GroupNorm(8, 64)
        self.bn1 = nn.GroupNorm(8, 128)
        self.bn2 = nn.GroupNorm(8, 128)
        self.bn3 = nn.GroupNorm(8, 128)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.bn0(x)

        x = self.conv2(x)
        x = F.relu(x)
        x = self.bn1(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.bn2(x)

        x = self.conv4(x)
        x = F.relu(x)
        x = self.bn3(x)

        x = self.avgpool(x)
        x = x.view(len(x), -1)
        return x


class ContextNet(nn.Module):
    def __init__(self, input_shape):
        super(ContextNet, self).__init__()

        # Keep same dimensions
        padding = (5 - 1) // 2
        self.context_net = nn.Sequential(
            nn.Conv2d(input_shape[0], 64, 5, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 5, padding=padding),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 1, 5, padding=padding),
        )

    def forward(self, x):
        return self.context_net(x)


def Featurizer(input_shape, hparams):
    """Auto-select an appropriate featurizer for the given input shape."""
    if len(input_shape) == 1:
        return MLP(input_shape[0], hparams["mlp_width"], hparams)
    elif input_shape[1:3] == (28, 28):
        return MNIST_CNN(input_shape)
    elif input_shape[1:3] == (32, 32):
        return wide_resnet.Wide_ResNet(input_shape, 16, 2, 0.)
    elif input_shape[1:3] == (224, 224):
        return ResNet(input_shape, hparams)
    else:
        raise NotImplementedError


def Classifier(in_features, out_features, is_nonlinear=False):
    if is_nonlinear:
        return torch.nn.Sequential(
            torch.nn.Linear(in_features, in_features // 2),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features // 2, in_features // 4),
            torch.nn.ReLU(),
            torch.nn.Linear(in_features // 4, out_features))
    else:
        return torch.nn.Linear(in_features, out_features)


class WholeFish(nn.Module):
    def __init__(self, input_shape, num_classes, hparams, weights=None):
        super(WholeFish, self).__init__()
        featurizer = Featurizer(input_shape, hparams)
        classifier = Classifier(
            featurizer.n_outputs,
            num_classes,
            hparams['nonlinear_classifier'])
        self.net = nn.Sequential(
            featurizer, classifier
        )
        if weights is not None:
            self.load_state_dict(copy.deepcopy(weights))

    def reset_weights(self, weights):
        self.load_state_dict(copy.deepcopy(weights))

    def forward(self, x):
        return self.net(x)


================================================
FILE: transopt/benchmark/HPOOOD/ooddatasets.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

import os
import torch
from PIL import Image, ImageFile
from torchvision import transforms
import torchvision.datasets.folder
from torch.utils.data import TensorDataset, Subset, ConcatDataset, Dataset
from torchvision.datasets import MNIST, ImageFolder
from torchvision.transforms.functional import rotate

from wilds.datasets.camelyon17_dataset import Camelyon17Dataset
from wilds.datasets.fmow_dataset import FMoWDataset

ImageFile.LOAD_TRUNCATED_IMAGES = True


def get_dataset_class(dataset_name):
    """Return the dataset class with the given name."""
    if dataset_name not in globals():
        raise NotImplementedError("Dataset not found: {}".format(dataset_name))
    return globals()[dataset_name]


def num_environments(dataset_name):
    return len(get_dataset_class(dataset_name).ENVIRONMENTS)


class MultipleDomainDataset:
    N_STEPS = 5001           # Default, subclasses may override
    CHECKPOINT_FREQ = 100    # Default, subclasses may override
    N_WORKERS = 1            # Default, subclasses may override
    ENVIRONMENTS = None      # Subclasses should override
    INPUT_SHAPE = None       # Subclasses should override

    def __getitem__(self, index):
        return self.datasets[index]

    def __len__(self):
        return len(self.datasets)


class Debug(MultipleDomainDataset):
    def __init__(self, root, test_envs, hparams):
        super().__init__()
        self.input_shape = self.INPUT_SHAPE
        self.num_classes = 2
        self.datasets = []
        for _ in [0, 1, 2]:
            self.datasets.append(
                TensorDataset(
                    torch.randn(16, *self.INPUT_SHAPE),
                    torch.randint(0, self.num_classes, (16,))
                )
            )

class Debug28(Debug):
    INPUT_SHAPE = (3, 28, 28)
    ENVIRONMENTS = ['0', '1', '2']

class Debug224(Debug):
    INPUT_SHAPE = (3, 224, 224)
    ENVIRONMENTS = ['0', '1', '2']


class MultipleEnvironmentMNIST(MultipleDomainDataset):
    def __init__(self, root, environments, dataset_transform, input_shape,
                 num_classes):
        super().__init__()
        if root is None:
            raise ValueError('Data directory not specified!')

        original_dataset_tr = MNIST(root, train=True, download=True)
        original_dataset_te = MNIST(root, train=False, download=True)

        original_images = torch.cat((original_dataset_tr.data,
                                     original_dataset_te.data))

        original_labels = torch.cat((original_dataset_tr.targets,
                                     original_dataset_te.targets))

        shuffle = torch.randperm(len(original_images))

        original_images = original_images[shuffle]
        original_labels = original_labels[shuffle]

        self.datasets = []

        for i in range(len(environments)):
            images = original_images[i::len(environments)]
            labels = original_labels[i::len(environments)]
            self.datasets.append(dataset_transform(images, labels, environments[i]))

        self.input_shape = input_shape
        self.num_classes = num_classes


class ColoredMNIST(MultipleEnvironmentMNIST):
    ENVIRONMENTS = ['+90%', '+80%', '-90%']

    def __init__(self, root, test_envs, hparams):
        super(ColoredMNIST, self).__init__(root, [0.1, 0.2, 0.9],
                                         self.color_dataset, (2, 28, 28,), 2)

        self.input_shape = (2, 28, 28,)
        self.num_classes = 2

    def color_dataset(self, images, labels, environment):
        # # Subsample 2x for computational convenience
        # images = images.reshape((-1, 28, 28))[:, ::2, ::2]
        # Assign a binary label based on the digit
        labels = (labels < 5).float()
        # Flip label with probability 0.25
        labels = self.torch_xor_(labels,
                                 self.torch_bernoulli_(0.25, len(labels)))

        # Assign a color based on the label; flip the color with probability e
        colors = self.torch_xor_(labels,
                                 self.torch_bernoulli_(environment,
                                                       len(labels)))
        images = torch.stack([images, images], dim=1)
        # Apply the color to the image by zeroing out the other color channel
        images[torch.tensor(range(len(images))), (
            1 - colors).long(), :, :] *= 0

        x = images.float().div_(255.0)
        y = labels.view(-1).long()

        return TensorDataset(x, y)

    def torch_bernoulli_(self, p, size):
        return (torch.rand(size) < p).float()

    def torch_xor_(self, a, b):
        return (a - b).abs()


class RotatedMNIST(MultipleEnvironmentMNIST):
    ENVIRONMENTS = ['0', '15', '30', '45', '60', '75']

    def __init__(self, root, test_envs, hparams):
        super(RotatedMNIST, self).__init__(root, [0, 15, 30, 45, 60, 75],
                                           self.rotate_dataset, (1, 28, 28,), 10)

    def rotate_dataset(self, images, labels, angle):
        rotation = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Lambda(lambda x: rotate(x, angle, fill=(0,),
                interpolation=torchvision.transforms.InterpolationMode.BILINEAR)),
            transforms.ToTensor()])

        x = torch.zeros(len(images), 1, 28, 28)
        for i in range(len(images)):
            x[i] = rotation(images[i])

        y = labels.view(-1)

        return TensorDataset(x, y)


class MultipleEnvironmentImageFolder(MultipleDomainDataset):
    def __init__(self, root, test_envs, augment, hparams):
        super().__init__()
        environments = [f.name for f in os.scandir(root) if f.is_dir()]
        environments = sorted(environments)

        transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        augment_transform = transforms.Compose([
            # transforms.Resize((224,224)),
            transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
            transforms.RandomGrayscale(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        self.datasets = []
        for i, environment in enumerate(environments):

            if augment and (i not in test_envs):
                env_transform = augment_transform
            else:
                env_transform = transform

            path = os.path.join(root, environment)
            env_dataset = ImageFolder(path,
                transform=env_transform)

            self.datasets.append(env_dataset)

        self.input_shape = (3, 224, 224,)
        self.num_classes = len(self.datasets[-1].classes)

class VLCS(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 300
    ENVIRONMENTS = ["C", "L", "S", "V"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "VLCS/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)

class PACS(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 300
    ENVIRONMENTS = ["A", "C", "P", "S"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "PACS/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)

class DomainNet(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 1000
    ENVIRONMENTS = ["clip", "info", "paint", "quick", "real", "sketch"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "domain_net/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)

class OfficeHome(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 300
    ENVIRONMENTS = ["A", "C", "P", "R"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "office_home/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)

class TerraIncognita(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 300
    ENVIRONMENTS = ["L100", "L38", "L43", "L46"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "terra_incognita/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)

class SVIRO(MultipleEnvironmentImageFolder):
    CHECKPOINT_FREQ = 300
    ENVIRONMENTS = ["aclass", "escape", "hilux", "i3", "lexus", "tesla", "tiguan", "tucson", "x5", "zoe"]
    def __init__(self, root, test_envs, hparams):
        self.dir = os.path.join(root, "sviro/")
        super().__init__(self.dir, test_envs, hparams['data_augmentation'], hparams)


class WILDSEnvironment:
    def __init__(
            self,
            wilds_dataset,
            metadata_name,
            metadata_value,
            transform=None):
        self.name = metadata_name + "_" + str(metadata_value)

        metadata_index = wilds_dataset.metadata_fields.index(metadata_name)
        metadata_array = wilds_dataset.metadata_array
        subset_indices = torch.where(
            metadata_array[:, metadata_index] == metadata_value)[0]

        self.dataset = wilds_dataset
        self.indices = subset_indices
        self.transform = transform

    def __getitem__(self, i):
        x = self.dataset.get_input(self.indices[i])
        if type(x).__name__ != "Image":
            x = Image.fromarray(x)

        y = self.dataset.y_array[self.indices[i]]
        if self.transform is not None:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return len(self.indices)


class WILDSDataset(MultipleDomainDataset):
    INPUT_SHAPE = (3, 224, 224)
    def __init__(self, dataset, metadata_name, test_envs, augment, hparams):
        super().__init__()

        transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        augment_transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.RandomResizedCrop(224, scale=(0.7, 1.0)),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
            transforms.RandomGrayscale(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        self.datasets = []

        for i, metadata_value in enumerate(
                self.metadata_values(dataset, metadata_name)):
            if augment and (i not in test_envs):
                env_transform = augment_transform
            else:
                env_transform = transform

            env_dataset = WILDSEnvironment(
                dataset, metadata_name, metadata_value, env_transform)

            self.datasets.append(env_dataset)

        self.input_shape = (3, 224, 224,)
        self.num_classes = dataset.n_classes

    def metadata_values(self, wilds_dataset, metadata_name):
        metadata_index = wilds_dataset.metadata_fields.index(metadata_name)
        metadata_vals = wilds_dataset.metadata_array[:, metadata_index]
        return sorted(list(set(metadata_vals.view(-1).tolist())))


class WILDSCamelyon(WILDSDataset):
    ENVIRONMENTS = [ "hospital_0", "hospital_1", "hospital_2", "hospital_3",
            "hospital_4"]
    def __init__(self, root, test_envs, hparams):
        dataset = Camelyon17Dataset(root_dir=root)
        super().__init__(
            dataset, "hospital", test_envs, hparams['data_augmentation'], hparams)


class WILDSFMoW(WILDSDataset):
    ENVIRONMENTS = [ "region_0", "region_1", "region_2", "region_3",
            "region_4", "region_5"]
    def __init__(self, root, test_envs, hparams):
        dataset = FMoWDataset(root_dir=root)
        super().__init__(
            dataset, "region", test_envs, hparams['data_augmentation'], hparams)


## Spawrious base classes
class CustomImageFolder(Dataset):
    """
    A class that takes one folder at a time and loads a set number of images in a folder and assigns them a specific class
    """
    def __init__(self, folder_path, class_index, limit=None, transform=None):
        self.folder_path = folder_path
        self.class_index = class_index
        self.image_paths = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.endswith(('.png', '.jpg', '.jpeg'))]
        if limit:
            self.image_paths = self.image_paths[:limit]
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        img_path = self.image_paths[index]
        img = Image.open(img_path).convert('RGB')
        
        if self.transform:
            img = self.transform(img)
        
        label = torch.tensor(self.class_index, dtype=torch.long)
        return img, label

class SpawriousBenchmark(MultipleDomainDataset):
    ENVIRONMENTS = ["Test", "SC_group_1", "SC_group_2"]
    input_shape = (3, 224, 224)
    num_classes = 4
    class_list = ["bulldog", "corgi", "dachshund", "labrador"]

    def __init__(self, train_combinations, test_combinations, root_dir, augment=True, type1=False):
        self.type1 = type1
        train_datasets, test_datasets = self._prepare_data_lists(train_combinations, test_combinations, root_dir, augment)
        self.datasets = [ConcatDataset(test_datasets)] + train_datasets

    # Prepares the train and test data lists by applying the necessary transformations.
    def _prepare_data_lists(self, train_combinations, test_combinations, root_dir, augment):
        test_transforms = transforms.Compose([
            transforms.Resize((self.input_shape[1], self.input_shape[2])),
            transforms.transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])
        
        if augment:
            train_transforms = transforms.Compose([
                transforms.Resize((self.input_shape[1], self.input_shape[2])),
                transforms.RandomHorizontalFlip(),
                transforms.ColorJitter(0.3, 0.3, 0.3, 0.3),
                transforms.RandomGrayscale(),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ])
        else:
            train_transforms = test_transforms

        train_data_list = self._create_data_list(train_combinations, root_dir, train_transforms)
        test_data_list = self._create_data_list(test_combinations, root_dir, test_transforms)

        return train_data_list, test_data_list

    # Creates a list of datasets based on the given combinations and transformations.
    def _create_data_list(self, combinations, root_dir, transforms):
        data_list = []
        if isinstance(combinations, dict):
            
            # Build class groups for a given set of combinations, root directory, and transformations.
            for_each_class_group = []
            cg_index = 0
            for classes, comb_list in combinations.items():
                for_each_class_group.append([])
                for ind, location_limit in enumerate(comb_list):
                    if isinstance(location_limit, tuple):
                        location, limit = location_limit
                    else:
                        location, limit = location_limit, None
                    cg_data_list = []
                    for cls in classes:
                        path = os.path.join(root_dir, f"{0 if not self.type1 else ind}/{location}/{cls}")
                        data = CustomImageFolder(folder_path=path, class_index=self.class_list.index(cls), limit=limit, transform=transforms)
                        cg_data_list.append(data)
                    
                    for_each_class_group[cg_index].append(ConcatDataset(cg_data_list))
                cg_index += 1

            for group in range(len(for_each_class_group[0])):
                data_list.append(
                    ConcatDataset(
                        [for_each_class_group[k][group] for k in range(len(for_each_class_group))]
                    )
                )
        else:
            for location in combinations:
                path = os.path.join(root_dir, f"{0}/{location}/")
                data = ImageFolder(root=path, transform=transforms)
                data_list.append(data)

        return data_list

    
    # Buils combination dictionary for o2o datasets
    def build_type1_combination(self,group,test,filler):
        total = 3168
        counts = [int(0.97*total),int(0.87*total)]
        combinations = {}
        combinations['train_combinations'] = {
            ## correlated class
            ("bulldog",):[(group[0],counts[0]),(group[0],counts[1])],
            ("dachshund",):[(group[1],counts[0]),(group[1],counts[1])],
            ("labrador",):[(group[2],counts[0]),(group[2],counts[1])],
            ("corgi",):[(group[3],counts[0]),(group[3],counts[1])],
            ## filler
            ("bulldog","dachshund","labrador","corgi"):[(filler,total-counts[0]),(filler,total-counts[1])],
        }
        ## TEST
        combinations['test_combinations'] = {
            ("bulldog",):[test[0], test[0]],
            ("dachshund",):[test[1], test[1]],
            ("labrador",):[test[2], test[2]],
            ("corgi",):[test[3], test[3]],
        }
        return combinations

    # Buils combination dictionary for m2m datasets
    def build_type2_combination(self,group,test):
        total = 3168
        counts = [total,total]
        combinations = {}
        combinations['train_combinations'] = {
            ## correlated class
            ("bulldog",):[(group[0],counts[0]),(group[1],counts[1])],
            ("dachshund",):[(group[1],counts[0]),(group[0],counts[1])],
            ("labrador",):[(group[2],counts[0]),(group[3],counts[1])],
            ("corgi",):[(group[3],counts[0]),(group[2],counts[1])],
        }
        combinations['test_combinations'] = {
            ("bulldog",):[test[0], test[1]],
            ("dachshund",):[test[1], test[0]],
            ("labrador",):[test[2], test[3]],
            ("corgi",):[test[3], test[2]],
        }
        return combinations

## Spawrious classes for each Spawrious dataset 
class SpawriousO2O_easy(SpawriousBenchmark):
    def __init__(self, root_dir, test_envs, hparams):
        group = ["desert","jungle","dirt","snow"]
        test = ["dirt","snow","desert","jungle"]
        filler = "beach"
        combinations = self.build_type1_combination(group,test,filler)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation'], type1=True)

class SpawriousO2O_medium(SpawriousBenchmark):
    def __init__(self, root_dir, test_envs, hparams):
        group = ['mountain', 'beach', 'dirt', 'jungle']
        test = ['jungle', 'dirt', 'beach', 'snow']
        filler = "desert"
        combinations = self.build_type1_combination(group,test,filler)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation'], type1=True)

class SpawriousO2O_hard(SpawriousBenchmark):
    def __init__(self, root_dir, test_envs, hparams):
        group = ['jungle', 'mountain', 'snow', 'desert']
        test = ['mountain', 'snow', 'desert', 'jungle']
        filler = "beach"
        combinations = self.build_type1_combination(group,test,filler)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation'], type1=True)

class SpawriousM2M_easy(SpawriousBenchmark):
    def __init__(self, root_dir, test_envs, hparams):
        group = ['desert', 'mountain', 'dirt', 'jungle']
        test = ['dirt', 'jungle', 'mountain', 'desert']
        combinations = self.build_type2_combination(group,test)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation']) 

class SpawriousM2M_medium(SpawriousBenchmark):
    def __init__(self, root_dir, test_envs, hparams):
        group = ['beach', 'snow', 'mountain', 'desert']
        test = ['desert', 'mountain', 'beach', 'snow']
        combinations = self.build_type2_combination(group,test)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation'])
        
class SpawriousM2M_hard(SpawriousBenchmark):
    ENVIRONMENTS = ["Test","SC_group_1","SC_group_2"]
    def __init__(self, root_dir, test_envs, hparams):
        group = ["dirt","jungle","snow","beach"]
        test = ["snow","beach","dirt","jungle"]
        combinations = self.build_type2_combination(group,test)
        super().__init__(combinations['train_combinations'], combinations['test_combinations'], root_dir, hparams['data_augmentation'])

================================================
FILE: transopt/benchmark/HPOOOD/wide_resnet.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

"""
From https://github.com/meliketoy/wide-resnet.pytorch
"""

import sys

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=1,
        bias=True)


def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform_(m.weight, gain=np.sqrt(2))
        init.constant_(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant_(m.weight, 1)
        init.constant_(m.bias, 0)


class wide_basic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1):
        super(wide_basic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, padding=1, bias=True)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(
            planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_planes, planes, kernel_size=1, stride=stride,
                    bias=True), )

    def forward(self, x):
        out = self.dropout(self.conv1(F.relu(self.bn1(x))))
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)

        return out


class Wide_ResNet(nn.Module):
    """Wide Resnet with the softmax layer chopped off"""
    def __init__(self, input_shape, depth, widen_factor, dropout_rate):
        super(Wide_ResNet, self).__init__()
        self.in_planes = 16

        assert ((depth - 4) % 6 == 0), 'Wide-resnet depth should be 6n+4'
        n = (depth - 4) / 6
        k = widen_factor

        # print('| Wide-Resnet %dx%d' % (depth, k))
        nStages = [16, 16 * k, 32 * k, 64 * k]

        self.conv1 = conv3x3(input_shape[0], nStages[0])
        self.layer1 = self._wide_layer(
            wide_basic, nStages[1], n, dropout_rate, stride=1)
        self.layer2 = self._wide_layer(
            wide_basic, nStages[2], n, dropout_rate, stride=2)
        self.layer3 = self._wide_layer(
            wide_basic, nStages[3], n, dropout_rate, stride=2)
        self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)

        self.n_outputs = nStages[3]

    def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
        strides = [stride] + [1] * (int(num_blocks) - 1)
        layers = []

        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        return out[:, :, 0, 0]


================================================
FILE: transopt/benchmark/RL/LunarlanderBenchmark.py
================================================
import gym
import logging
import random
import numpy as np
import ConfigSpace as CS
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr
from gplearn.genetic import SymbolicRegressor
from typing import Union, Dict

from transopt.benchmark.problem_base import NonTabularProblem
from agent.registry import benchmark_register


logger = logging.getLogger("LunarLanderBenchmark")

# 计算两组数据的 Pearson 相关系数和 p 值


def lunar_lander_simulation(w, print_reward=False, seed=1, dimension=12):
    total_reward = 0.0
    steps = 0
    env_name = "LunarLander-v2"
    env = gym.make(env_name)
    s = env.reset(seed=seed)[0]
    while True:
        if dimension == 5:
            a = heuristic_controller5d(s, w, is_continuous=False)
        # elif dimension == 6:
        #     a = heuristic_controller6d(s, w, is_continuous=False)
        # elif dimension == 8:
        #     a = heuristic_controller8d(s, w, is_continuous=False)
        elif dimension == 10:
            a = heuristic_controller10d(s, w, is_continuous=False)
        else:
            a = heuristic_controller(s[0], w)
        s, r, done, info, _ = env.step(a)
        total_reward += r
        steps += 1
        if done:
            break
    if print_reward:
        print(f"Total reward: {total_reward}")
    return total_reward


def heuristic_controller(s, w, is_continuous=True):
    # w is the array of controller parameters of shape (1, 12)
    angle_target = s[0] * w[0] + s[2] * w[1]
    if angle_target > w[2]:
        angle_target = w[2]
    if angle_target < w[-2]:
        angle_target = -w[2]
    hover_target = w[3] * np.abs(s[0])
    angle_todo = (angle_target - s[4]) * w[4] - (s[5]) * w[5]
    hover_todo = (hover_target - s[1]) * w[6] - (s[3]) * w[7]
    if s[6] or s[7]:
        angle_todo = w[8]
        hover_todo = -(s[3]) * w[9]
    if is_continuous:
        a = np.array([hover_todo * 20 - 1, angle_todo * 20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > w[10]:
            a = 2
        elif angle_todo < -w[11]:
            a = 3
        elif angle_todo > +w[11]:
            a = 1
    return a


def heuristic_controller5d(s, w, is_continuous=True):
    # w is the array of controller parameters of shape (1, 12)
    angle_target = s[0] * w[0] + s[2] * 1.0
    if angle_target > 0.4:
        angle_target = 0.4
    if angle_target < -0.4:
        angle_target = -0.4
    hover_target = w[1] * np.abs(s[0])
    angle_todo = (angle_target - s[4]) * w[2] - (s[5]) * w[3]
    hover_todo = (hover_target - s[1]) * w[4] - (s[3]) * 0.5
    if s[6] or s[7]:
        angle_todo = 0
        hover_todo = (
            -(s[3]) * 0.5
        )  # override to reduce fall speed, that's all we need after contact

    if is_continuous:
        a = np.array([hover_todo * 20 - 1, angle_todo * 20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
            a = 2
        elif angle_todo < -0.05:
            a = 3
        elif angle_todo > +0.05:
            a = 1
    return a


#
# def heuristic_controller6d(s, w, is_continuous=True):
#     # w is the array of controller parameters of shape (1, 12)
#     angle_target = s[0] * w[0] + s[2] *  w[1]
#     if angle_target > 0.4:
#         angle_target = 0.4
#     if angle_target < -0.4:
#         angle_target = -0.4
#     hover_target = w[2] * np.abs(s[0])
#     angle_todo = (angle_target - s[4]) * w[3] - (s[5]) * w[4]
#     hover_todo = (hover_target - s[1]) * w[5] - (s[3]) * 0.5
#     if s[6] or s[7]:
#         angle_todo = 0
#         hover_todo = (
#                 -(s[3]) * 0.5
#         )  # override to reduce fall speed, that's all we need after contact
#
#     if is_continuous:
#         a = np.array([hover_todo * 20 - 1, angle_todo * 20])
#         a = np.clip(a, -1, +1)
#     else:
#         a = 0
#         if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
#             a = 2
#         elif angle_todo < -0.05:
#             a = 3
#         elif angle_todo > +0.05:
#             a = 1
#     return a
#
#
# def heuristic_controller8d(s, w, is_continuous=True):
#     # w is the array of controller parameters of shape (1, 12)
#     angle_target = s[0] * w[0] + s[2] * w[1]
#     if angle_target > w[2]:
#         angle_target = w[2]
#     if angle_target < -w[2]:
#         angle_target = -w[2]
#     hover_target = w[3] * np.abs(s[0])
#     angle_todo = (angle_target - s[4]) * w[4] - (s[5]) * w[5]
#     hover_todo = (hover_target - s[1]) * w[6] - (s[3]) * w[7]
#     if s[6] or s[7]:
#         angle_todo = 0
#         hover_todo = (
#             -(s[3]) * 0.5
#         )  # override to reduce fall speed, that's all we need after contact
#
#     if is_continuous:
#         a = np.array([hover_todo * 20 - 1, angle_todo * 20])
#         a = np.clip(a, -1, +1)
#     else:
#         a = 0
#         if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
#             a = 2
#         elif angle_todo < -0.05:
#             a = 3
#         elif angle_todo > +0.05:
#             a = 1
#     return a
#


def heuristic_controller10d(s, w, is_continuous=True):
    # w is the array of controller parameters of shape (1, 12)
    angle_target = s[0] * w[0] + s[2] * w[1]
    if angle_target > w[2]:
        angle_target = w[2]
    if angle_target < -w[2]:
        angle_target = -w[2]
    hover_target = w[3] * np.abs(s[0])
    angle_todo = (angle_target - s[4]) * w[4] - (s[5]) * w[5]
    hover_todo = (hover_target - s[1]) * w[6] - (s[3]) * w[7]
    if s[6] or s[7]:
        angle_todo = w[8]
        hover_todo = -(s[3]) * w[9]
    if is_continuous:
        a = np.array([hover_todo * 20 - 1, angle_todo * 20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
            a = 2
        elif angle_todo < -0.05:
            a = 3
        elif angle_todo > +0.05:
            a = 1
    return a


def vanilla_heuristic(s, is_continuous=False):
    angle_targ = s[0] * 0.5 + s[2] * 1.0  # angle should point towards center
    if angle_targ > 0.4:
        angle_targ = 0.4  # more than 0.4 radians (22 degrees) is bad
    if angle_targ < -0.4:
        angle_targ = -0.4
    hover_targ = 0.55 * np.abs(
        s[0]
    )  # target y should be proportional to horizontal offset

    angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
    hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5

    if s[6] or s[7]:  # legs have contact
        angle_todo = 0
        hover_todo = (
            -(s[3]) * 0.5
        )  # override to reduce fall speed, that's all we need after contact

    if is_continuous:
        a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
        a = np.clip(a, -1, +1)
    else:
        a = 0
        if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
            a = 2
        elif angle_todo < -0.05:
            a = 3
        elif angle_todo > +0.05:
            a = 1
    return a


@benchmark_register("Lunar")
class LunarlanderBenchmark(NonTabularProblem):
    """
    DixonPrice function

    :param sd: standard deviation, to generate noisy evaluations of the function.
    """

    lunar_seeds = [2, 3, 4, 5, 10, 14, 15, 19]

    def __init__(self, task_name, task_id, budget, seed, task_type="non-tabular"):
        super(LunarlanderBenchmark, self).__init__(
            task_name=task_name, seed=seed, task_type=task_type, budget=budget
        )
        self.lunar_seed = LunarlanderBenchmark.lunar_seeds[task_id]

    def objective_function(
        self,
        configuration: Union[CS.Configuration, Dict],
        fidelity: Union[Dict, CS.Configuration, None] = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([configuration[k] for idx, k in enumerate(configuration.keys())])

        y = lunar_lander_simulation(X, seed=self.lunar_seed, dimension=self.input_dim)
        return {"function_value": float(y), "info": {"fidelity": fidelity}}

    def get_configuration_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        cs = CS.ConfigurationSpace(seed=seed)
        cs.add_hyperparameters(
            [
                CS.UniformFloatHyperparameter(f"x{i}", lower=0, upper=2.0)
                for i in range(10)
            ]
        )

        return cs

    def get_fidelity_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        fidel_space = CS.ConfigurationSpace(seed=seed)

        fidel_space.add_hyperparameters([])

        return fidel_space

    def get_meta_information(self) -> Dict:
        print(1)
        return {}


if __name__ == "__main__":
    seed_list = [2, 3, 4, 5, 10, 14, 15, 19]
    result_vectors = []
    for seed in seed_list:
        # 设置随机种子
        np.random.seed(seed)
        # 执行函数 100 次并记录结果
        sample_number = 100
        dim = 10

        fixed_dims = {0: 2.0, 1: 1.8, 2: 0.01, 4: 0.01, 5: 0.01}

        # Generate random data for other dimensions
        samples_x = np.random.uniform(-1, 1, (sample_number, dim))

        # Assign fixed values to specified dimensions
        # for dim, value in fixed_dims.items():
        #     samples_x[:, dim] = value

        # samples_x= np.random.uniform(0, 2, size=(sample_number, dim))
        # samples_x = np.sort(samples_x, axis=0)
        # samples_x =  np.random.uniform(0, 2, size=(100, 10))
        bench = LunarlanderBenchmark(task_name="lunar", task_id=0, seed=0, budget=10000)
        xx = {}
        for i in range(10):
            xx[f"x{i}"] = samples_x[0][i]
        result = bench.f(xx)
        print(result)
        # 将结果转换为 100*1 的向量
        result_vector = np.array(result).reshape(-1, 1)

        # 将结果向量存储到列表中
        result_vectors.append(result_vector)

        plt.figure()
        plt.clf()
        # 绘制采样结果的分布图
        plt.hist(result, bins=30, density=True, alpha=0.7)
        # # 添加横纵轴标签和标题
        plt.xlabel("Value")
        plt.ylabel("Density")
        plt.title(f"Distribution of Sampled Function, seed:{seed}")
        plt.show()
        # plt.savefig(f'seed_{seed}')

        # 训练symbolic regressor
        est_gp = SymbolicRegressor(
            population_size=5000,
            generations=20,
            stopping_criteria=0.01,
            p_crossover=0.7,
            p_subtree_mutation=0.1,
            p_hoist_mutation=0.05,
            p_point_mutation=0.1,
            max_samples=0.9,
            verbose=1,
            parsimony_coefficient=0.01,
            random_state=0,
        )
        est_gp.fit(samples_x, result_vector)

        print("最佳程序：", est_gp._program)

    # 对每个结果向量进行相关性分析
    for i, vector1 in enumerate(result_vectors):
        for j, vector2 in enumerate(result_vectors):
            if i != j:
                correlation, p = spearmanr(vector1.flatten(), vector2.flatten())
                print(
                    f"Correlation between seed {seed_list[i]} and seed {seed_list[j]}: {correlation},p_{p}"
                )


================================================
FILE: transopt/benchmark/RL/__init__.py
================================================


================================================
FILE: transopt/benchmark/__init__.py
================================================
# from transopt.benchmark.instantiate_problems import InstantiateProblems


================================================
FILE: transopt/benchmark/instantiate_problems.py
================================================
from transopt.agent.registry import problem_registry
# from transopt.benchmark.problem_base.tab_problem import TabularProblem
from transopt.benchmark.problem_base.transfer_problem import TransferProblem, RemoteTransferOptBenchmark


def InstantiateProblems(
    tasks: dict = None, seed: int = 0, remote: bool = False, server_url: str = None
) -> TransferProblem:
    tasks = tasks or {}

    if remote:
        if server_url is None:
            raise ValueError("Server URL must be provided for remote testing.")
        transfer_problems = RemoteTransferOptBenchmark(server_url, seed)
    else:
        transfer_problems = TransferProblem(seed)

    for task_name, task_params in tasks.items():
        budget = task_params.get("budget", 0)
        workloads = task_params.get("workloads", [])
        budget_type = task_params.get("budget_type", 'Num_FEs')
        params = task_params.get("params", {})


        problem_cls = problem_registry[task_name]
        if problem_cls is None:
            raise KeyError(f"Task '{task_name}' not found in the problem registry.")

        for idx, workload in enumerate(workloads):
            problem = problem_cls(
                task_name=f"{task_name}",
                task_id=idx,
                budget_type=budget_type,
                budget=budget,
                seed=seed,
                workload=workload,
                params=params,
            )
            transfer_problems.add_task(problem)

    return transfer_problems


================================================
FILE: transopt/benchmark/problem_base/__init__.py
================================================
# from benchmark.problem_base.base import ProblemBase
# from benchmark.problem_base.non_tab_problem import NonTabularProblem
# from benchmark.problem_base.tab_problem import TabularProblem
# from benchmark.problem_base.transfer_problem import TransferProblem, RemoteTransferOptBenchmark


================================================
FILE: transopt/benchmark/problem_base/base.py
================================================
""" Base-class of all benchmarks """

import abc
import logging

from numpy.random.mtrand import RandomState as RandomState
from transopt.space.search_space import SearchSpace
from transopt.space.fidelity_space import FidelitySpace
import numpy as np
from typing import Union, Dict
from transopt.space.variable import *

logger = logging.getLogger("AbstractProblem")


class ProblemBase(abc.ABC):
    def __init__(self, seed: Union[int, np.random.RandomState, None] = None, **kwargs):
        """
        Interface for benchmarks.

        A benchmark consists of two building blocks, the target function and
        the configuration space. Furthermore it can contain additional
        benchmark-specific information such as the location and the function
        value of the global optima.
        New benchmarks should be derived from this base class or one of its
        child classes.

        Parameters
        ----------
        seed: int, np.random.RandomState, None
            The default random state for the benchmark. If type is int, a
            np.random.RandomState with seed `rng` is created. If type is None,
            create a new random state.
        """

        self.seed = seed
        self.fidelity_space = self.get_fidelity_space()
        self.objective_info = self.get_objectives()
        self.problem_type = self.get_problem_type()
        self.configuration_space = self.get_configuration_space()
        
        self.input_dim = len(self.configuration_space.get_hyperparameter_names())
        self.num_objective = len(self.objective_info)

    def f(self, configuration, fidelity=None, seed=None, **kwargs) -> Dict:
        # Check validity of configuration and fidelity before evaluation
        self.check_validity(configuration, fidelity)

        # Delegate to the specific evaluation method implemented by subclasses
        return self.objective_function(configuration, fidelity, seed, **kwargs)

    @abc.abstractmethod
    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        """Implement this method in subclasses to define specific evaluation logic."""
        raise NotImplementedError


    @staticmethod
    @abc.abstractmethod
    def get_configuration_space(self) -> SearchSpace:
        """Defines the configuration space for each benchmark.
        Parameters
        ----------
        seed: int, None
            Seed for the configuration space.

        Returns
        -------
        ConfigSpace.ConfigurationSpace
            A valid configuration space for the benchmark's parameters
        """
        raise NotImplementedError()

    def check_validity(self, configuration, fidelity):
        # Check if each configuration key and value is valid
        for key, value in configuration.items():
            if key not in self.configuration_space.ranges:
                raise ValueError(f"Configuration key {key} is not valid.")

            if type(self.configuration_space.get_design_variable(key)) is Categorical:
                if not (value in self.configuration_space.get_design_variable(key).categories):
                    raise ValueError(
                        f"Value of {key}={value} is out of allowed range {range}."
                    )
            else:
                design_range = self.configuration_space.get_design_variable(key).range
                if not (design_range[0] <= value <= design_range[1]):
                    raise ValueError(
                        f"Value of {key}={value} is out of allowed range {design_range}."
                    )

        if fidelity is None:
            return

        # Check if each fidelity key and value is valid
        for key, value in fidelity.items():
            if key not in self.fidelity_space.ranges:
                raise ValueError(f"Fidelity key {key} is not valid.")
            range = self.fidelity_space.ranges[key]
            if not (range[0] <= value <= range[1]):
                raise ValueError(
                    f"Value of {key}={value} is out of allowed range {range}."
                )

    def __call__(self, configuration: Dict, **kwargs) -> float:
        """Provides interface to use, e.g., SciPy optimizers"""
        return self.f(configuration, **kwargs)["function_value"]


    @abc.abstractmethod
    def get_fidelity_space(self) -> FidelitySpace:
        """Defines the available fidelity parameters as a "fidelity space" for each benchmark.
        Parameters
        ----------
        seed: int, None
            Seed for the fidelity space.
        Returns
        -------
        ConfigSpace.ConfigurationSpace
            A valid configuration space for the benchmark's fidelity parameters
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def get_objectives(self) -> dict:
        """Defines the available fidelity parameters as a "fidelity space" for each benchmark.
        Parameters
        ----------
        seed: int, None
            Seed for the fidelity space.
        Returns
        -------
        ConfigSpace.ConfigurationSpace
            A valid configuration space for the benchmark's fidelity parameters
        """
        raise NotImplementedError()
    
    
    @property
    @abc.abstractmethod
    def problem_type(self):
        raise NotImplementedError()
    @property
    @abc.abstractmethod
    def num_objectives(self):
        raise NotImplementedError()  
    @property
    @abc.abstractmethod
    def num_variables(self):
        raise NotImplementedError()  


================================================
FILE: transopt/benchmark/problem_base/non_tab_problem.py
================================================
""" Base-class of configuration optimization benchmarks """
import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Union

import numpy as np

from transopt.benchmark.problem_base.base import ProblemBase

logger = logging.getLogger("NonTabularProblem")


import abc


class NonTabularProblem(ProblemBase):
    def __init__(
        self,
        task_name: str,
        budget_type,
        budget: int,
        workload,
        seed: Union[int, np.random.RandomState, None] = None,
        **kwargs,
    ):
        self.task_name = task_name
        self.budget = budget
        self.workload = workload
        self.lock_flag = False
        self.budget_type = budget_type

        super(NonTabularProblem, self).__init__(seed, **kwargs)

    def get_budget_type(self) -> str:
        """Provides the budget type about the benchmark.

        Returns
        -------
        str
            some human-readable information

        """
        return self.budget_type

    def get_budget(self) -> int:
        """Provides the function evaluations number about the benchmark.

        Returns
        -------
        int
            some human-readable information

        """
        return self.budget

    def get_name(self) -> str:
        """Provides the task name about the benchmark.

        Returns
        -------
        str
            some human-readable information

        """
        return self.task_name

    def get_type(self) -> str:
        """Provides the task type about the benchmark.

        Returns
        -------
        str
            some human-readable information

        """
        return self.problem_type

    def get_input_dim(self) -> int:
        """Provides the input dimension about the benchmark.

        Returns
        -------
        int
            some human-readable information

        """
        return self.num_variables

    def get_objective_num(self) -> int:
        return self.num_objectives

    def lock(self):
        self.lock_flag = True

    def unlock(self):
        self.lock_flag = False

    def get_lock_state(self) -> bool:
        return self.lock_flag
    
    @property
    @abc.abstractmethod
    def workloads(self):
        raise NotImplementedError()
    
    @property
    @abc.abstractmethod
    def fidelity(self):
        raise NotImplementedError()

================================================
FILE: transopt/benchmark/problem_base/tab_problem.py
================================================
import logging
import os
from pathlib import Path
from typing import Dict, List, Union
from urllib.parse import urlparse

import numpy as np
import pandas as pds

from transopt.benchmark.problem_base.base import ProblemBase
from transopt.utils.encoding import multitarget_encoding, target_encoding
from transopt.utils.Read import read_file

logger = logging.getLogger("TabularProblem")


class TabularProblem(ProblemBase):
    def __init__(
            self,
            task_name: str,
            task_type: str,
            budget: int,
            workload,
            path: str = None,
            seed: Union[int, np.random.RandomState, None] = None,
            space_info: Dict = None,
            **kwargs,
    ):

        super(TabularProblem, self).__init__(task_name= task_name, task_type=task_type, budget=budget,workload=workload, seed=seed, **kwargs)
        self.path = path

        parsed = urlparse(path)
        if parsed.scheme and parsed.netloc:
            return "URL"
        # If the string is a valid file path
        elif os.path.exists(path) or os.path.isabs(path):
            dir_path = Path(path)
            workload_path = dir_path / workload
            data = read_file(workload_path)
            unnamed_columns = [col for col in data.columns if "Unnamed" in col]
            # delete the unnamed column
            data.drop(unnamed_columns, axis=1, inplace=True)

            para_names = [value for value in data.columns]
            if space_info is None or not isinstance(space_info, dict):
                self.space_info = {}
            else:
                self.space_info = space_info

            if 'input_dim' not in self.space_info and 'num_objective' not in self.space_info:
                self.space_info['input_dim'] = len(para_names) - 1

                self.space_info['num_objective'] = len(para_names) - self.space_info['input_dim']
            elif 'input_dim' in self.space_info and 'num_objective' in self.space_info:
                pass
            else:
                if 'num_objective' in self.space_info:
                    self.space_info['input_dim'] = len(para_names) - self.space_info['num_objective']

                if 'input_dim' in self.space_info:
                        self.space_info['num_objective'] = len(para_names) - self.space_info['input_dim']

            self.input_dim = self.space_info['input_dim']
            self.num_objective = self.space_info['num_objective']
            self.encodings = {}
            for i in range(self.num_objective):
                data[f"function_value_{i+1}"] = data[para_names[self.input_dim+i]]

            if 'variables' not in self.space_info:
                self.space_info['variables'] = {}
                for i in range(self.space_info['input_dim']):
                    var_name = para_names[i]
                    max_value = data[var_name].max()
                    min_value = data[var_name].min()
                    contains_decimal = False
                    contains_str = False
                    if data[var_name][1:].nunique() > 10:
                        for item in data[var_name][1:]:
                            if isinstance(item, str):
                                contains_str = True
                            if int(item) - item != 0:
                                contains_decimal = True
                                break  # 如果找到小数，无需继续检查
                        if contains_decimal:
                            var_type =  'continuous'
                            self.space_info['variables'][var_name] = {'bounds': [min_value, max_value],
                                                                      'type': var_type}
                        elif contains_str:
                            var_type = 'categorical'
                            data[var_name] = data[var_name].astype(str)

                            self.space_info['variables'][var_name] = {'bounds': [0, len(data[var_name][1:].unique()) - 1] ,
                                                                      'type': var_type}
                            if self.num_objective > 1:
                                self.cat_mapping = multitarget_encoding(data, var_name, [f'function_value_{i+1}' for i in range(self.num_objective)])
                            else:
                                self.cat_mapping = target_encoding(data, var_name, 'function_value_1')

                        else:
                            var_type = 'integer'
                            data[var_name] = data[var_name].astype(int)
                            self.space_info['variables'][var_name] = {'bounds': [min_value, max_value],
                                                                      'type': var_type}
                    else:
                        var_type = 'categorical'
                        data[var_name] = data[var_name].astype(str)


                        if self.num_objective > 1:
                            self.cat_mapping = multitarget_encoding(data, var_name, [f'function_value_{i + 1}' for i in
                                                                           range(self.num_objective)])
                        else:
                            self.cat_mapping = target_encoding(data, var_name, 'function_value_1')
                        max_key = max(self.cat_mapping.keys())

                        # 找出最小的键
                        min_key = min(self.cat_mapping.keys())
                        self.space_info['variables'][var_name] = {'bounds': [min_key, max_key],
                                                                  'type': var_type}


            data['config'] = data.apply(lambda row: row[:self.input_dim].tolist(), axis=1)
            data["config_s"] = data["config"].astype(str)
        else:
            raise ValueError("Unknown path type, only accept url or file path")

        
        self.var_range = self.get_configuration_bound()
        self.var_type = self.get_configuration_type()
        self.unqueried_data = data
        self.queried_data = pds.DataFrame(columns=data.columns)

    def f(
            self,
            configuration: Union[Dict, None],
            fidelity: Union[Dict, None] = None,
            **kwargs,
    ) -> Dict:

        results = self.objective_function(
            configuration=configuration, fidelity=fidelity, seed=self.seed
        )

        return results

    def objective_function(
            self,
            configuration: Union[ Dict],
            fidelity: Union[Dict, None] = None,
            seed: Union[np.random.RandomState, int, None] = None,
            **kwargs,
    ) -> Dict:
        c = {}
        for k in configuration.keys():
            if self.space_info['variables'][k]['type'] == 'categorical':
                c[k] = self.cat_mapping[configuration[k]]
            else:
                c[k] = configuration[k]

        X = str([configuration[k] for idx, k in enumerate(configuration.keys())])
        data = self.unqueried_data[self.unqueried_data['config_s'] == X]

        if not data.empty:
            self.unqueried_data.drop(data.index, inplace=True)
            self.queried_data = pds.concat([self.queried_data, data], ignore_index=True)
        else:
            raise ValueError(f"Configuration {X} not exist in oracle")

        res = {}
        for i in range(self.num_objective):
            res[f"function_value_{i+1}"] = float(data['fitness'])
        res["info"] = {"fidelity": fidelity}
        return res

    def sample_dataframe(key, df, p_remove=0.):
        """Randomly sample dataframe by the removal percentage."""
        if p_remove < 0 or p_remove >= 1:
            raise ValueError(
                f'p_remove={p_remove} but p_remove must be <1 and >= 0.')
        if p_remove > 0:
            n_remain = (1 - p_remove) * len(df)
            n_remain = int(np.ceil(n_remain))
            df = df.sample(n=n_remain, replace=False, random_state=key[0])
        return df


    # def get_configuration_bound(self):
    #     configuration_bound = {}
    #     for k, v in self.configuration_space.items():
    #         if type(v) is ConfigSpace.CategoricalHyperparameter:
    #             configuration_bound[k] = [0, len(v.choices) - 1]
    #         else:
    #             configuration_bound[k] = [v.lower, v.upper]

    #     return configuration_bound

    def get_configuration_type(self):
        configuration_type = {}
        for k, v in self.configuration_space.items():
            configuration_type[k] = type(v).__name__
        return configuration_type

    def get_configuration_space(
            self, seed: Union[int, None] = None
    ) :
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        # cs = CS.ConfigurationSpace(seed=seed)
        # variables = []

        # for k,v in self.space_info['variables'].items():
        #     lower = v['bounds'][0]
        #     upper = v['bounds'][1]
        #     if 'continuous' == v['type']:
        #         variables.append(CS.UniformFloatHyperparameter(k, lower=lower, upper=upper))
        #     elif 'integer' == v['type']:
        #         variables.append(CS.UniformIntegerHyperparameter(k, lower=lower, upper=upper))
        #     elif 'categorical' == v['type']:
        #         variables.append(CS.UniformIntegerHyperparameter(k, lower=lower, upper=upper))
        #     else:
        #         raise ValueError('Unknown variable type')

        # cs.add_hyperparameters(variables)
        # return cs

    def get_fidelity_space(
            self, seed: Union[int, None] = None
    ):
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        # fidel_space = CS.ConfigurationSpace(seed=seed)

        # return fidel_space

    def get_meta_information(self) -> Dict:
        return {}

    def get_budget(self) -> int:
        """Provides the function evaluations number about the benchmark.

        Returns
        -------
        int
            some human-readable information

        """
        return self.budget

    def get_name(self) -> str:
        """Provides the task name about the benchmark.

        Returns
        -------
        str
            some human-readable information

        """
        return self.task_name

    def get_type(self) -> str:
        """Provides the task type about the benchmark.

        Returns
        -------
        str
            some human-readable information

        """
        return self.task_type

    def get_input_dim(self) -> int:
        """Provides the input dimension about the benchmark.

        Returns
        -------
        int
            some human-readable information

        """
        return self.input_dim

    def get_objective_num(self) -> int:
        return self.num_objective

    def lock(self):
        self.lock_flag = True

    def unlock(self):
        self.lock_flag = False

    def get_lock_state(self) -> bool:
        return self.lock_flag


    def get_dataset_size(self):
        raise NotImplementedError

    def get_var_by_idx(self, idx):
        raise NotImplementedError

    def get_idx_by_var(self, vectors):
        raise NotImplementedError

    def get_unobserved_vars(self):
        raise NotImplementedError

    def get_unobserved_idxs(self):
        raise NotImplementedError


================================================
FILE: transopt/benchmark/problem_base/transfer_problem.py
================================================
import abc
import logging
import numpy as np
from typing import Union, Dict, List

from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.benchmark.problem_base.tab_problem import TabularProblem
from transopt.remote import ExperimentClient
from transopt.space.search_space import SearchSpace
logger = logging.getLogger("TransferProblem")


class TransferProblem:
    def __init__(self, seed: Union[int, np.random.RandomState, None] = None, **kwargs):
        self.seed = seed
        self.tasks = []
        self.time = []
        self.query_nums = []
        self.__id = 0

    def add_task_to_id(
        self,
        insert_id: int,
        task: Union[
            NonTabularProblem,
            TabularProblem,
        ],
    ):
        num_tasks = len(self.tasks)
        assert insert_id < num_tasks + 1

        self.tasks.insert(insert_id, task)
        self.query_nums.insert(insert_id, 0)

    def add_task(
        self,
        task: Union[
            NonTabularProblem,
            TabularProblem,
        ],
    ):
        num_tasks = len(self.tasks)
        insert_id = num_tasks
        self.add_task_to_id(insert_id, task)

    def del_task_by_id(self, del_id, name):
        pass

    def get_cur_id(self):
        return self.__id

    def get_tasks_num(self):
        return len(self.tasks)

    def get_unsolved_num(self):
        return len(self.tasks) - self.__id

    def get_rest_budget(self):
        return self.get_cur_budget() - self.get_query_num()

    def get_query_num(self):
        return self.query_nums[self.__id]

    def get_cur_budgettype(self):
        return self.tasks[self.__id].get_budget_type()

    def get_cur_budget(self):
        return self.tasks[self.__id].get_budget()

    def get_curname(self):
        return self.tasks[self.__id].get_name()

    def get_curdim(self):
        return self.tasks[self.__id].get_input_dim()

    def get_curobj_info(self):
        return self.tasks[self.__id].get_objectives()
    
    def get_cur_fidelity_info(self) -> Dict:
        return self.tasks[self.__id].fidelity_space.get_fidelity_range()

    def get_cur_searchspace_info(self) -> Dict:
        return self.tasks[self.__id].configuration_space.get_design_variables()
    
    
    def get_cur_searchspace(self) -> SearchSpace:
        return self.tasks[self.__id].configuration_space
    

    def get_curtask(self):
        return self.tasks[self.__id]
    
    
    def get_cur_seed(self):
        return self.tasks[self.__id].seed

    def get_cur_task_id(self):
        return self.tasks[self.__id].task_id

    def get_cur_workload(self):
        return self.tasks[self.__id].workload


    def sync_query_num(self, query_num: int):
        self.query_nums[self.__id] = query_num

    def roll(self):
        self.__id += 1

    def lock(self):
        self.tasks[self.__id].lock()

    def unlock(self):
        self.tasks[self.__id].unlock()

    def get_lockstate(self):
        return self.tasks[self.__id].get_lock_state()

    def get_task_type(self):
        if isinstance(self.tasks[self.__id], TabularProblem):
            return "tabular"
        elif isinstance(self.tasks[self.__id], NonTabularProblem):
            return "non-tabular"
        else:
            logger.error("Unknown task type.")
            raise NameError


    ###Methods only for tabular data###
    def get_dataset_size(self):
        assert isinstance(self.tasks[self.__id], TabularProblem)
        return self.tasks[self.__id].get_dataset_size()

    def get_var_by_idx(self, idx):
        assert isinstance(self.tasks[self.__id], TabularProblem)
        return self.tasks[self.__id].get_var_by_idx(idx)

    def get_idx_by_var(self, vectors):
        assert isinstance(self.tasks[self.__id], TabularProblem)
        return self.tasks[self.__id].get_idx_by_var(vectors)

    def get_unobserved_vars(self):
        assert isinstance(self.tasks[self.__id], TabularProblem)
        return self.tasks[self.__id].get_unobserved_vars()

    def get_unobserved_idxs(self):
        assert isinstance(self.tasks[self.__id], TabularProblem)
        return self.tasks[self.__id].get_unobserved_idxs()

    def add_query_num(self):
        if self.get_lockstate() == False:
            self.query_nums[self.__id] += 1

    def f(
        self,
        configuration: Union[
            Dict,
            List[Dict],
        ],
        fidelity: Union[
            Dict,
            None,
            List[Dict],
        ] = None,
        **kwargs,
    ):
        if isinstance(configuration, list):
            try:
                if (
                    self.get_query_num() + len(configuration) > self.get_cur_budget()
                    and self.get_lockstate() == False
                ):
                    logger.error(
                        " The current function evaluation has exceeded the user-set budget."
                    )
                    raise RuntimeError("The current function evaluation has exceeded the user-set budget.")
            except RuntimeError as e:
                return None

            if isinstance(fidelity, list):
                assert len(fidelity) == len(configuration)
            elif fidelity is None:
                fidelity = [None] * len(configuration)
            else:
                pass

            results = []
            for c_id, config in enumerate(configuration):
                result = self.tasks[self.__id].f(config, fidelity[c_id])
                self.add_query_num()

                results.append(result)
            return results
        else:
            if (
                self.get_query_num() >= self.get_cur_budget()
                and self.get_lockstate() == False
            ):
                logger.error(
                    " The current function evaluation has exceeded the user-set budget."
                )
                raise EnvironmentError

            result = self.tasks[self.__id].f(configuration, fidelity)
            self.add_query_num()
            return result

            # raise TypeError(f"Unrecognized task type.")


class RemoteTransferOptBenchmark(TransferProblem):
    def __init__(
        self, server_url, seed: Union[int, np.random.RandomState, None] = None, **kwargs
    ):
        super().__init__(seed=seed, **kwargs)
        self.client = ExperimentClient(server_url)
        self.task_params_list = []

    def add_task_to_id(
        self,
        insert_id: int,
        task: NonTabularProblem | TabularProblem,
        task_params,
    ):
        assert insert_id < len(self.tasks) + 1

        self.task_params_list.insert(insert_id, task_params)
        self.tasks.insert(insert_id, task)
        self.query_nums.insert(insert_id, 0)

    def f(
        self,
        configuration: Union[
            Dict,
            List[Union[Dict]],
        ],
        fidelity: Union[
            Dict,
            None,
            List[Union[Dict]],
        ] = None,
        idx: Union[int, None, List[int]] = None,
        **kwargs,
    ):
        space = self.get_cur_searchspace()
        bench_name = self.get_curname().split("_")[0]
        bench_params = self.task_params_list[self.get_curid()]

        if not space or not bench_name or not bench_params:
            raise ValueError("Missing or incorrect data for benchmark.")

        # Package data
        data = self._package_data(
            space, bench_name, bench_params, configuration, fidelity, idx, **kwargs
        )

        result = self._execute_experiment(data)

        return result

    def _package_data(
        self, space, bench_name, bench_params, configuration, fidelity, idx, **kwargs
    ):
        return {
            "benchmark": bench_name,
            "id": space["task_id"],
            "budget": space["budget"],
            "seed": space["seed"],
            "bench_params": bench_params,
            "fitness_params": {
                "configuration": configuration,
                "fidelity": fidelity,
                "idx": idx,
                **kwargs,
            },
        }

    def _execute_experiment(self, data):
        # Send data to server and get the result
        task_id = self.client.start_experiment(data)

        # Wait for the task to complete and get the result
        return self.client.wait_for_result(task_id)


================================================
FILE: transopt/benchmark/synthetic/MovingPeakBenchmark.py
================================================
import logging
import numpy as np
import ConfigSpace as CS
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize
from typing import Union, Tuple, Dict, List

from transopt.benchmark.problem_base import NonTabularProblem
from agent.registry import benchmark_register


logger = logging.getLogger("MovingPeakBenchmark")


class MovingPeakGenerator:
    def __init__(
        self,
        n_var,
        shift_length=3.0,
        height_severity=7.0,
        width_severity=1.0,
        lam=0.5,
        n_peak=4,
        n_step=11,
        seed=None,
    ):
        if seed is not None:
            np.random.seed(seed)
        self.n_var = n_var
        self.shift_length = shift_length
        self.height_severity = height_severity
        self.width_severity = width_severity

        # lambda determines whether there is a direction of the movement, or whether they are totally random.
        # For lambda = 1.0 each move has the same direction, while for lambda = 0.0, each move has a random direction
        self.lam = lam

        # number of peaks in the landscape
        self.n_peak = n_peak

        self.var_bound = np.array([[0, 100]] * n_var)

        self.height_bound = np.array([[30, 70]] * n_peak)

        self.width_bound = np.array([[1.0, 12.0]] * n_peak)

        self.n_step = n_step

        self.t = 0

        self.bounds = np.array(
            [[-1.0] * self.n_var, [1.0] * self.n_var], dtype=np.float64
        )

        current_peak = np.random.random(size=(n_peak, n_var)) * np.tile(
            self.var_bound[:, 1] - self.var_bound[:, 0], (n_peak, 1)
        ) + np.tile(self.var_bound[:, 0], (n_peak, 1))

        current_width = (
            np.random.random(size=(n_peak,))
            * (self.width_bound[:, 1] - self.width_bound[:, 0])
            + self.width_bound[:, 0]
        )

        current_height = (
            np.random.random(size=(n_peak,))
            * (self.height_bound[:, 1] - self.height_bound[:, 0])
            + self.height_bound[:, 0]
        )

        previous_shift = normalize(
            np.random.random(size=(n_peak, n_var)), axis=1, norm="l2"
        )

        self.peaks = []
        self.widths = []
        self.heights = []

        self.peaks.append(current_peak)
        self.widths.append(current_width)
        self.heights.append(current_height)

        for t in range(1, n_step):
            peak_shift = self.cal_peak_shift(previous_shift)
            width_shift = self.cal_width_shift()
            height_shift = self.cal_height_shift()
            current_peak = current_peak + peak_shift
            current_height = current_height + height_shift.squeeze()
            current_width = current_width + width_shift.squeeze()
            for i in range(self.n_peak):
                self._fix_bound(current_peak[i, :], self.var_bound)
            self._fix_bound(current_width, self.width_bound)
            self._fix_bound(current_height, self.height_bound)
            previous_shift = peak_shift
            self.peaks.append(current_peak)
            self.widths.append(current_width)
            self.heights.append(current_height)

    def get_MPB(self):
        return self.peaks, self.widths, self.heights

    def cal_width_shift(self):
        width_change = np.random.random(size=(self.n_peak, 1))
        return self.width_severity * width_change

    def cal_height_shift(self):
        height_change = np.random.random(size=(self.n_peak, 1))
        return self.height_severity * height_change

    def cal_peak_shift(self, previous_shift):
        peak_change = np.random.random(size=(self.n_peak, self.n_var))
        return (1 - self.lam) * self.shift_length * normalize(
            peak_change - 0.5, axis=1, norm="l2"
        ) + self.lam * previous_shift

    def change(self):
        if self.t < self.n_step - 1:
            self.t += 1

    def current_optimal(self, peak_shape=None):
        current_peak = self.peaks[self.t]
        current_height = self.heights[self.t]
        optimal_x = np.atleast_2d(current_peak[np.argmax(current_height)])
        optimal_y = self.f(optimal_x, peak_shape)
        return optimal_x, optimal_y

    def transfer(self, X):
        return (X + 1) * (self.var_bound[:, 1] - self.var_bound[:, 0]) / 2 + (
            self.var_bound[:, 0]
        )

    def normalize(self, X):
        return (
            2
            * (X - (self.var_bound[:, 0]))
            / (self.var_bound[:, 1] - self.var_bound[:, 0])
            - 1
        )

    @property
    def optimizers(self):
        current_peak = self.peaks[self.t]
        current_height = self.heights[self.t]
        optimal_x = np.atleast_2d(current_peak[np.argmax(current_height)])
        optimal_x = self.normalize(optimal_x)
        return optimal_x

    @staticmethod
    def _fix_bound(data, bound):
        for i in range(data.shape[0]):
            if data[i] < bound[i, 0]:
                data[i] = 2 * bound[i, 0] - data[i]
            elif data[i] > bound[i, 1]:
                data[i] = 2 * bound[i, 1] - data[i]
            while data[i] < bound[i, 0] or data[i] > bound[i, 1]:
                data[i] = data[i] * 0.5 + bound[i, 0] * 0.25 + bound[i, 1] * 0.25


@benchmark_register("MPB")
class MovingPeakBenchmark(NonTabularProblem):
    def __init__(
        self,
        task_name,
        budget,
        peak,
        height,
        width,
        seed,
        input_dim,
        task_type="non-tabular",
    ):
        self.dimension = input_dim
        self.peak = peak
        self.height = height
        self.width = width
        self.n_peak = len(peak)
        super(MovingPeakBenchmark, self).__init__(
            task_name=task_name, seed=seed, task_type=task_type, budget=budget
        )

    def peak_function_cone(self, x):
        distance = np.linalg.norm(np.tile(x, (self.n_peak, 1)) - self.peak, axis=1)
        return np.max(self.height - self.width * distance)

    def peak_function_sharp(self, x):
        distance = np.linalg.norm(np.tile(x, (self.n_peak, 1)) - self.peak, axis=1)
        return np.max(self.height / (1 + self.width * distance * distance))

    def peak_function_hilly(self, x):
        distance = np.linalg.norm(np.tile(x, (self.n_peak, 1)) - self.peak, axis=1)
        return np.max(
            self.height
            - self.width * distance * distance
            - 0.01 * np.sin(20.0 * distance * distance)
        )

    def objective_function(
        self,
        configuration: Union[CS.Configuration, Dict],
        fidelity: Union[Dict, CS.Configuration, None] = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        if "peak_shape" not in kwargs:
            peak_shape = "cone"
        else:
            peak_shape = kwargs["peak_shape"]

        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])
        if peak_shape == "cone":
            peak_function = self.peak_function_cone
        elif peak_shape == "sharp":
            peak_function = self.peak_function_sharp
        elif peak_shape == "hilly":
            peak_function = self.peak_function_hilly
        else:
            # print("Unknown shape, set to default")
            peak_function = self.peak_function_cone
        y = peak_function(X)

        return {"function_value": float(y), "info": {"fidelity": fidelity}}

    def get_configuration_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        cs = CS.ConfigurationSpace(seed=seed)
        cs.add_hyperparameters(
            [
                CS.UniformFloatHyperparameter(f"x{i}", lower=0.0, upper=100.0)
                for i in range(self.dimension)
            ]
        )

        return cs

    def get_fidelity_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        fidel_space = CS.ConfigurationSpace(seed=seed)

        return fidel_space

    def get_meta_information(self) -> Dict:
        print(1)
        return {}


================================================
FILE: transopt/benchmark/synthetic/MultiObjBenchmark.py
================================================
import os
import math
import logging
import numpy as np
import matplotlib.pyplot as plt
import ConfigSpace as CS
from typing import Union, Dict
import random
from agent.registry import benchmark_register
from transopt.benchmark.problem_base import NonTabularProblem

logger = logging.getLogger("MultiObjBenchmark")


@benchmark_register("AckleySphere")
class AckleySphereOptBenchmark(NonTabularProblem):
    def __init__(
        self, task_name, budget, seed, workload = None, task_type="non-tabular", **kwargs
    ):

        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]
        self.workload = workload
        rnd_instance = random.Random()
        rnd_instance.seed(self.workload)

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.array([rnd_instance.random() for _ in range(self.input_dim)])[:, np.newaxis].T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64


        super(AckleySphereOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            task_type=task_type,
            budget=budget,
            workload=workload,
        )

    def objective_function(
        self,
        configuration: Union[CS.Configuration, Dict],
        fidelity: Union[Dict, CS.Configuration, None] = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        a = 20
        b = 0.2
        c = 2 * np.pi
        f_1 = (
            -a * np.exp(-b * np.sqrt(np.sum(X**2) / 2))
            - np.exp(np.sum(np.cos(c * X)) / 2)
            + a
            + np.e
        )
        f_2 = np.sum(X**2)
        return {
            "function_value_1": float(f_1),
            "function_value_2": float(f_2),
            "info": {"fidelity": fidelity},
        }

    def get_configuration_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all parameters for
        the XGBoost Model

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        cs = CS.ConfigurationSpace(seed=seed)
        cs.add_hyperparameters(
            [
                CS.UniformFloatHyperparameter(f"x{i}", lower=-5.12, upper=5.12)
                for i in range(self.input_dim)
            ]
        )

        return cs

    def get_fidelity_space(
        self, seed: Union[int, None] = None
    ) -> CS.ConfigurationSpace:
        """
        Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for
        the XGBoost Benchmark

        Parameters
        ----------
        seed : int, None
            Fixing the seed for the ConfigSpace.ConfigurationSpace

        Returns
        -------
        ConfigSpace.ConfigurationSpace
        """
        seed = seed if seed is not None else np.random.randint(1, 100000)
        fidel_space = CS.ConfigurationSpace(seed=seed)

        return fidel_space

    def get_meta_information(self) -> Dict:
        return {"number_objective": 2}


================================================
FILE: transopt/benchmark/synthetic/__init__.py
================================================
from transopt.benchmark.synthetic.synthetic_problems import (
    # SphereOptBenchmark,
    # RastriginOptBenchmark,
    # SchwefelOptBenchmark,
    # LevyROptBenchmark,
    # GriewankOptBenchmark,
    # RosenbrockOptBenchmark,
    # DropwaveROptBenchmark,
    # LangermannOptBenchmark,
    # RotatedHyperEllipsoidOptBenchmark,
    # SumOfDifferentPowersOptBenchmark,
    # StyblinskiTangOptBenchmark,
    # PowellOptBenchmark,
    # DixonPriceOptBenchmark,
    # cpOptBenchmark,
    # mpbOptBenchmark,
    Ackley,
    # EllipsoidOptBenchmark,
    # DiscusOptBenchmark,
    # BentCigarOptBenchmark,
    # SharpRidgeOptBenchmark,
    # GriewankRosenbrockOptBenchmark,
    # KatsuuraOptBenchmark,
)

================================================
FILE: transopt/benchmark/synthetic/synthetic_problems.py
================================================
# %matplotlib notebook

import os
import math
import logging
import numpy as np
import matplotlib.pyplot as plt
from typing import Union, Dict
from transopt.space.variable import *
from transopt.agent.registry import problem_registry
from transopt.benchmark.problem_base.non_tab_problem import NonTabularProblem
from transopt.space.search_space import SearchSpace
from transopt.space.fidelity_space import FidelitySpace
from matplotlib import gridspec


logger = logging.getLogger("SyntheticBenchmark")

class SyntheticProblemBase(NonTabularProblem):
    problem_type = "synthetic"
    num_variables = []
    num_objectives = 1
    workloads = []
    fidelity = None
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        super(SyntheticProblemBase, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def get_fidelity_space(self) -> FidelitySpace:
        fs = FidelitySpace([])
        return fs

    def get_objectives(self) -> Dict:
        return {'f1':'minimize'}

    def get_problem_type(self):
        return "synthetic"
    

@problem_registry.register("Sphere")
class SphereOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        super(SphereOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.sum((X) ** 2, axis=1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results
    
    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.12, 5.12)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Rastrigin")
class RastriginOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift + 2.0)
        self.dtype = np.float64

        super(RastriginOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift - 0.4)

        n = X.shape[0]
        d = X.shape[1]

        pi = np.array([math.pi], dtype=self.dtype)
        y = 10.0 * self.input_dim + np.sum((X) ** 2 - 10.0 * np.cos(pi * (X)), axis=1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.12, 5.12)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Schwefel")
class SchwefelOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(420.9687 - self.shift)
        self.dtype = np.float64

        super(SchwefelOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = 420 - np.sum(
            np.multiply(X, np.sin(np.sqrt(abs(self.stretch * X - self.shift)))), axis=1
        )
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-500.0, 500.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("LevyR")
class LevyROptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift - 1.0)
        self.dtype = np.float64

        super(LevyROptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift - 0.1)

        n = X.shape[0]
        d = X.shape[1]

        w = 1.0 + X / 4.0
        pi = np.array([math.pi], dtype=self.dtype)
        part1 = np.sin(pi * w[..., 0]) ** 2
        part2 = np.sum(
            (w[..., :-1] - 1.0) ** 2
            * (1.0 + 5.0 * np.sin(math.pi * w[..., :-1] + 1.0) ** 2),
            axis=1,
        )
        part3 = (w[..., -1] - 1.0) ** 2 * (1.0 + np.sin(2 * math.pi * w[..., -1]) ** 2)
        y = part1 + part2 + part3
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-10.0, 10.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Griewank")
class GriewankOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        super(GriewankOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        div = np.arange(start=1, stop=d + 1, dtype=self.dtype)
        part1 = np.sum(X**2 / 4000.0, axis=1)
        part2 = -np.prod(np.cos(X / np.sqrt(div)), axis=1)
        y = part1 + part2 + 1.0
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-100.0, 100.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Rosenbrock")
class RosenbrockOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        super(RosenbrockOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.sum(
            100.0 * (X[..., 1:] - X[..., :-1] ** 2) ** 2 + (X[..., :-1] - 1) ** 2,
            axis=-1,
        )
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 10.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("DropwaveR")
class DropwaveROptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift + 3.3)
        self.dtype = np.float64

        self.a = np.array([20], dtype=self.dtype)
        self.b = np.array([0.2], dtype=self.dtype)
        self.c = np.array([2 * math.pi], dtype=self.dtype)

        super(DropwaveROptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift - 0.33)

        n = X.shape[0]
        d = X.shape[1]

        part1 = np.linalg.norm(X, axis=1)
        y = -(3 + np.cos(part1)) / (0.1 * np.power(part1, 1.5) + 1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-10.0, 10.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Langermann")
class LangermannOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        self.c = np.array([1, 2, 5])
        self.m = 3
        self.A = np.random.randint(1, 10, (self.m, self.input_dim))

        super(LangermannOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = 0
        for i in range(self.m):
            part1 = np.exp(-np.sum(np.power(X - self.A[i], 2), axis=1) / np.pi)
            part2 = np.cos(np.sum(np.power(X - self.A[i], 2), axis=1) * np.pi)
            y += part1 * part2 * self.c[i]
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (0.0, 10.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("RotatedHyperEllipsoid")
class RotatedHyperEllipsoidOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift - 32.75)
        self.dtype = np.float64

        super(RotatedHyperEllipsoidOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift + 0.5)

        n = X.shape[0]
        d = X.shape[1]

        div = np.arange(start=d, stop=0, step=-1, dtype=self.dtype)
        y = np.sum(div * X**2, axis=1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-65.536, 65.536)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("SumOfDifferentPowers")
class SumOfDifferentPowersOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift + 0.238)
        self.dtype = np.float64

        super(SumOfDifferentPowersOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift - 0.238)

        n = X.shape[0]
        d = X.shape[1]

        y = np.zeros(shape=(n,), dtype=self.dtype)
        for i in range(d):
            y += np.abs(X[:, i]) ** (i + 1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-1.0, 1.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("StyblinskiTang")
class StyblinskiTangOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift - 2.903534)
        self.dtype = np.float64

        super(StyblinskiTangOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = 0.5 * (X**4 - 16 * X**2 + 5 * X).sum(axis=1)
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Powell")
class PowellOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = [tuple(0.0 for _ in range(self.input_dim))]
        self.dtype = np.float64

        super(PowellOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.zeros_like(X[..., 0])
        for i in range(self.input_dim // 4):
            i_ = i + 1
            part1 = (X[..., 4 * i_ - 4] + 10.0 * X[..., 4 * i_ - 3]) ** 2
            part2 = 5.0 * (X[..., 4 * i_ - 2] - X[..., 4 * i_ - 1]) ** 2
            part3 = (X[..., 4 * i_ - 3] - 2.0 * X[..., 4 * i_ - 2]) ** 4
            part4 = 10.0 * (X[..., 4 * i_ - 4] - X[..., 4 * i_ - 1]) ** 4
            y += part1 + part2 + part3 + part4
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-4.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("DixonPrice")
class DixonPriceOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = [
            tuple(
                math.pow(2.0, -(1.0 - 2.0 ** (-(i - 1))))
                for i in range(1, self.input_dim + 1)
            )
        ]
        self.dtype = np.float64

        super(DixonPriceOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        part1 = (X[..., 0] - 1) ** 2
        i = np.arange(start=2, stop=d + 1, step=1)
        i = np.tile(i, (n, 1))
        part2 = np.sum(i * (2.0 * X[..., 1:] ** 2 - X[..., :-1]) ** 2, axis=1)
        y = part1 + part2
        # y +=  self.noise(n)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results
    
    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-10.0, 10.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("cp")
class cpOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = [
            tuple(
                math.pow(2.0, -(1.0 - 2.0 ** (-(i - 1))))
                for i in range(1, self.input_dim + 1)
            )
        ]
        self.dtype = np.float64

        super(cpOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array(
            [[configuration[k] for idx, k in enumerate(configuration.keys())]]
        )[0]

        part1 = np.sin(6 * X[0]) + X[1] ** 2
        part2 = 0.1 * X[0] ** 2 + 0.1 * X[1] ** 2

        if self.task_id == 1:
            part3 = 0.1 * ((3) * (X[0] + 0.3)) ** 2 + 0.1 * ((3) * (X[1] + 0.3)) ** 2
        else:
            part3 = 0.1 * ((3) * (X[0] - 0.3)) ** 2 + 0.1 * ((3) * (X[1] - 0.3)) ** 2

        y = part1 + part3 + part2

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-1.0, 1.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("mpb")
class mpbOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = [
            tuple(
                math.pow(2.0, -(1.0 - 2.0 ** (-(i - 1))))
                for i in range(1, self.input_dim + 1)
            )
        ]
        self.dtype = np.float64

        super(mpbOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array(
            [[configuration[k] for idx, k in enumerate(configuration.keys())]]
        )[0]

        n_peak = 2
        self.peak = np.ndarray([[-0.5, -0.5], [0.2, 0.2], []])

        if self.task_id == 0:
            distance = np.linalg.norm(np.tile(X, (n_peak, 1)) - self.peak[0], axis=1)
        elif self.task_id == 1:
            distance = np.linalg.norm(np.tile(X, (n_peak, 1)) - self.peak[0], axis=1)
        else:
            distance = np.linalg.norm(np.tile(X, (n_peak, 1)) - self.peak[0], axis=1)

        y = np.max(self.height - self.width * distance)

        return {"f1": float(y), "info": {"fidelity": fidelity}}

def get_configuration_space(self) -> SearchSpace:
        
        variables =  [Continuous(f'x{i}', (-32.768, 32.768)) for i in range(self.input_dim)]
        
        ss = SearchSpace(variables)

        return ss


@problem_registry.register("Ackley")
class Ackley(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift - 12)
        self.dtype = np.float64

        self.a = np.array([20], dtype=self.dtype)
        self.b = np.array([0.2], dtype=self.dtype)
        self.c = np.array([0.3 * math.pi], dtype=self.dtype)

        super(Ackley, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift - 0.73)

        n = X.shape[0]
        d = X.shape[1]
        a, b, c = self.a, self.b, self.c

        part1 = -a * np.exp(-b / math.sqrt(d) * np.linalg.norm(X, axis=-1))
        part2 = -(np.exp(np.mean(np.cos(c * X), axis=-1)))
        y = part1 + part2 + a + math.e

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-32.768, 32.768)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss

    
@problem_registry.register("Ellipsoid")
class EllipsoidOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        self.condition = 1e6

        super(EllipsoidOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.array([])
        for x in X:
            temp = x[0] * x[0]
            for i in range(1, d):
                temp += pow(self.condition, exponent) * x[i] * x[i]
            y = np.append(y, temp)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Discus")
class DiscusOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        self.condition = 1e6

        super(DiscusOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.array([])
        for x in X:
            temp = self.condition * x[0] * x[0]
            for i in range(1, d):
                temp += x[i] * x[i]
            y = np.append(y, temp)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("BentCigar")
class BentCigarOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        self.condition = 1e6

        super(BentCigarOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.array([])
        for x in X:
            temp = x[0] * x[0]
            for i in range(1, d):
                temp += self.condition * x[i] * x[i]
            y = np.append(y, temp)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("SharpRidge")
class SharpRidgeOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        self.alpha = 100.0

        super(SharpRidgeOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        d_vars_40 = d / 40.0
        vars_40 = int(math.ceil(d_vars_40))
        y = np.array([])
        for x in X:
            temp = 0
            for i in range(vars_40, d):
                temp += x[i] * x[i]
            temp = self.alpha * math.sqrt(temp / d_vars_40)
            for i in range(vars_40):
                temp += x[i] * x[i] / d_vars_40
            y = np.append(y, temp)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results

    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("GriewankRosenbrock")
class GriewankRosenbrockOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        super(GriewankRosenbrockOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.array([])
        for x in X:
            temp = 0
            for i in range(len(x) - 1):
                temp1 = x[i] * x[i] - x[i + 1]
                temp2 = 1.0 - x[i]
                temp3 = 100.0 * temp1**2 + temp2**2
                temp += temp3 / 4000.0 - math.cos(temp3)
            y = np.append(y, temp)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results


    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


@problem_registry.register("Katsuura")
class KatsuuraOptBenchmark(SyntheticProblemBase):
    def __init__(
        self, task_name, budget_type, budget, seed, workload, **kwargs
    ):
        assert "params" in kwargs
        parameters = kwargs["params"]
        self.input_dim = parameters["input_dim"]

        if "shift" in parameters:
            self.shift = parameters["shift"]
        else:
            shift = np.random.random(size=(self.input_dim, 1)).T
            self.shift = (shift * 2 - 1) * 0.02

        if "stretch" in parameters:
            self.stretch = parameters["stretch"]
        else:
            self.stretch = np.array([1] * self.input_dim, dtype=np.float64)

        self.optimizers = tuple(self.shift)
        self.dtype = np.float64

        super(KatsuuraOptBenchmark, self).__init__(
            task_name=task_name,
            seed=seed,
            workload=workload,
            budget_type=budget_type,
            budget=budget,
        )

    def objective_function(
        self,
        configuration: Dict,
        fidelity: Dict = None,
        seed: Union[np.random.RandomState, int, None] = None,
        **kwargs,
    ) -> Dict:
        X = np.array([[configuration[k] for idx, k in enumerate(configuration.keys())]])

        X = self.stretch * (X - self.shift)

        n = X.shape[0]
        d = X.shape[1]

        y = np.array([])
        for x in X:
            result = 1.0
            for i in range(len(x)):
                temp = 0.0
                for j in range(1, 33):
                    temp1 = 2.0**j
                    temp += abs(temp1 * x[i] - round(temp1 * x[i])) / temp1
                temp = 1.0 + (i + 1) * temp
                result *= temp ** (10.0 / (len(x) ** 1.2))
            y = np.append(y, result)

        results = {list(self.objective_info.keys())[0]: float(y)}
        for fd_name in self.fidelity_space.fidelity_names:
            results[fd_name] = fidelity[fd_name] 
        return results


    def get_configuration_space(self) -> SearchSpace:
        variables =  [Continuous(f'x{i}', (-5.0, 5.0)) for i in range(self.input_dim)]
        ss = SearchSpace(variables)
        return ss


def visualize_function(func_name, n_points=100):
    """Visualize synthetic benchmark functions in 1D and 2D.
    
    Args:
        func_name (str): Name of the benchmark function
        n_points (int): Number of points for visualization
    """
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    # Create benchmark instance
    params = {"input_dim": 2}  # We'll use 2D for visualization
    benchmark = problem_registry.get(func_name)(
        task_name="visualization",
        budget_type="time",
        budget=100,
        seed=42,
        workload=None,
        params=params
    )

    # Create figure
    fig = plt.figure(figsize=(15, 5))
    
    # 1D Plot - 使用set_position调整位置和大小
    # [left, bottom, width, height]
    ax1 = fig.add_subplot(121)
    ax1.set_position([0.05, 0.15, 0.35, 0.7])  # 调整左图的位置和大小
    
    x = np.linspace(-5, 5, n_points)
    y = []
    for xi in x:
        config = {"x0": xi, "x1": 0.0}
        result = benchmark.objective_function(config)
        y.append(result[list(result.keys())[0]])
    
    ax1.plot(x, y, 'b-', linewidth=2)
    ax1.set_title(f'{func_name} Function (1D)')
    ax1.set_xlabel('x')
    ax1.set_ylabel('f(x)')
    ax1.grid(True)

    # 2D Plot - 使用set_position调整位置和大小
    ax2 = fig.add_subplot(122, projection='3d')
    ax2.set_position([0.5, 0.1, 0.45, 0.8])  # 调整右图的位置和大小
    
    x = np.linspace(-5, 5, n_points)
    y = np.linspace(-5, 5, n_points)
    X, Y = np.meshgrid(x, y)
    Z = np.zeros_like(X)
    
    for i in range(n_points):
        for j in range(n_points):
            config = {"x0": X[i,j], "x1": Y[i,j]}
            result = benchmark.objective_function(config)
            Z[i,j] = result[list(result.keys())[0]]
    
    surf = ax2.plot_surface(X, Y, Z, cmap='viridis', 
                          linewidth=0, antialiased=True)
    fig.colorbar(surf, ax=ax2, shrink=0.5, aspect=5)
    
    ax2.set_title(f'{func_name} Function (2D)')
    ax2.set_xlabel('x')
    ax2.set_ylabel('y')
    ax2.set_zlabel('f(x,y)')
    
    plt.savefig(f'{func_name}.png', bbox_inches='tight', dpi=300)
    plt.close()

# Example usage:
if __name__ == "__main__":
    # Test visualization with some benchmark functions
    functions = ["Sphere", "Rastrigin", "Ackley"]
    for func in functions:
        visualize_function(func)


================================================
FILE: transopt/datamanager/__init__.py
================================================


================================================
FILE: transopt/datamanager/database.py
================================================
import atexit
import json
import queue
import sqlite3
import time
from multiprocessing import Event, Manager, Process, Queue
from typing import Union

import numpy as np
import pandas as pd

from transopt.utils.log import logger
from transopt.utils.path import get_library_path

"""
Descriptions of the reserved database tables.
"""
table_descriptions = {
    "_config": """
        name varchar(200) not null,
        config text not null,
        is_experiment boolean not null default TRUE
        """,
    "_metadata": """
        table_name varchar(255) not null,
        problem_name varchar(255) not null,
        dimensions int,
        objectives int,
        fidelities text,
        workloads int,
        budget_type varchar(50),
        budget int,
        seeds int,
        space_refiner varchar(50),
        sampler varchar(50),
        pretrain varchar(50),
        model varchar(50),
        acf varchar(50),
        normalizer varchar(50),
        dataset_selectors json,
        PRIMARY KEY (table_name)
    """,
}


class DatabaseDaemon:
    def __init__(self, data_path, task_queue, result_queue, stop_event):
        self.data_path = data_path
        self.task_queue = task_queue
        self.result_queue = result_queue
        self.stop_event = stop_event

    def run(self):
        with sqlite3.connect(self.data_path) as conn:
            cursor = conn.cursor()
            while not self.stop_event.is_set():
                task = self.task_queue.get()  # Check every second
                if task is None:  # Sentinel for stopping
                    break
                func, args, commit = task
                try:
                    result = func(cursor, *args)
                    if commit:
                        conn.commit()
                    self.result_queue.put(("SUCCESS", result))
                except Exception as e:
                    conn.rollback()
                    logger.error(
                        f"Database operation failed: {e}", exc_info=True
                    )
                    self.result_queue.put(("FAILURE", e))


class Database:
    def __init__(self, db_file_name="database.db"):
        self.data_path = get_library_path() / db_file_name

        manager = Manager()
        self.task_queue = manager.Queue()
        self.result_queue = manager.Queue()
        self.lock = manager.Lock()
        self.transaction_lock = manager.Lock()
        self.stop_event = manager.Event()

        self.process = Process(
            target=DatabaseDaemon(
                self.data_path, self.task_queue, self.result_queue, self.stop_event
            ).run
        )
        self.process.start()
        atexit.register(self.close)

        # reserved tables
        self.reserved_tables = list(table_descriptions.keys())
        for name, desc in table_descriptions.items():
            if not self.check_table_exist(name):
                self.execute(f'CREATE TABLE "{name}" ({desc})')

    def close(self):
        self.stop_event.set()
        self.task_queue.put(None)
        self.process.join()

    def _execute(self, task, args=(), timeout=None, commit=True):
        self.task_queue.put((task, args, commit))
        try:
            status, result = self.result_queue.get(timeout=timeout)
            if status == "SUCCESS":
                return result
            else:
                raise result  # Re-raise the exception from the daemon
        except queue.Empty:
            raise Exception("Task execution timed out or failed")

    @staticmethod
    def query_exec(cursor, query, params, fetchone, fetchall, many):
        if many:
            cursor.executemany(query, params or [])
        else:
            cursor.execute(query, params or ())
        if fetchone:
            return cursor.fetchone()
        if fetchall:
            return cursor.fetchall()
        return None

    def execute(
        self,
        query,
        params=None,
        fetchone=False,
        fetchall=False,
        timeout=None,
        commit=True,
    ):
        with self.lock:
            return self._execute(
                Database.query_exec,
                (query, params, fetchone, fetchall, False),
                timeout,
                commit
            )

    def executemany(
        self,
        query,
        params=None,
        fetchone=False,
        fetchall=False,
        timeout=None,
        commit=True,
    ):
        with self.lock:
            return self._execute(
                Database.query_exec,
                (query, params, fetchone, fetchall, True),
                timeout,
                commit
            )

    def start_transaction(self):
        self.execute("BEGIN", commit=False)

    def commit_transaction(self):
        self.execute("COMMIT", commit=False)

    def rollback_transaction(self):
        self.execute("ROLLBACK", commit=False)
        
    """ 
    table
    """

    def get_experiment_datasets(self):
        """Get the list of all tables that are marked as experiment datasets."""
        experiment_datasets = self.execute(
            "SELECT name FROM _config WHERE is_experiment = TRUE", fetchall=True
        )
        return [
            table[0]
            for table in experiment_datasets
            if table[0] not in self.reserved_tables
        ]

    def get_all_datasets(self):
        """Get the list of all tables and indicate which ones are experiment datasets."""
        all_datasets = self.execute(
            """SELECT name, is_experiment FROM _config""", fetchall=True
        )
        return [
            table[0] for table in all_datasets if table[0] not in self.reserved_tables
        ]

    def get_table_list(self):
        """Get the list of all database tables."""
        table_list = self.execute(
            "SELECT name FROM sqlite_master WHERE type='table'", fetchall=True
        )
        return [
            table[0] for table in table_list if table[0] not in self.reserved_tables
        ]

    def check_table_exist(self, name):
        """Check if a certain database table exists."""
        table_exists = self.execute(
            "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
            params=(name,),
            fetchone=True,
        )
        return table_exists is not None

    def create_table(self, name, dataset_cfg, overwrite=False, is_experiment=True):
        """
        Create and initialize a database table based on problem configuration.

        Parameters
        ----------
        name: str
            Name of the table to create and initialize.
        dataset_cfg: dict
            Configuration for the table schema.
        overwrite : bool, optional
            Flag to determine whether to overwrite the existing table, default is False.
        is_experiment : bool, optional
            Flag to denote if the table is for experimental use, default is True.
        """
        if self.check_table_exist(name):
            if overwrite:
                self.remove_table(name)
            else:
                raise Exception(f"Table {name} already exists")

        variables = dataset_cfg.get("variables", [])
        objectives = dataset_cfg.get("objectives", [])
        fidelities = dataset_cfg.get("fidelities", [])

        var_type_map = {
            "continuous": "float",
            "log_continuous": "float",
            "integer": "int",
            "large_integer": "text",  # Store large integers as text to handle very large values
            "int_exponent": "int",
            "exp2": "int",
            "categorical": "varchar(50)",
            # 'binary': 'boolean',
        }

        # description = ['status varchar(20) not null default "unevaluated"']
        description = []

        for var_info in variables:
            description.append(
                f'"{var_info["name"]}" {var_type_map[var_info["type"]]} not null'
            )

        for obj_info in objectives:
            description.append(f'"{obj_info["name"]}" float')

        for fid_info in fidelities:
            description.append(
                f'"{fid_info["name"]}" {var_type_map[fid_info["type"]]} not null'
            )

        description += [
            "batch int default -1",
            "error boolean default 0",
            # "pareto boolean",
            # "batch int not null",
            # "order int default -1",
            # "hypervolume float",
        ]

        with self.transaction_lock:
            try:
                self.start_transaction()
            
                # Create the table
                self.execute(f'CREATE TABLE "{name}" ({",".join(description)})', commit=False)

                # Optionally, create indexes on certain columns
                index_columns = [var["name"] for var in variables] + [
                    fid["name"] for fid in fidelities if fid.get("index", False)
                ]
                if index_columns:
                    index_statement = ", ".join([f'"{col}"' for col in index_columns])
                    self.execute(f'CREATE INDEX "idx_{name}" ON "{name}" ({index_statement})', commit=False)

                self.create_or_update_config(name, dataset_cfg, is_experiment, commit=False)
                if "additional_config" in dataset_cfg:
                    self.create_or_update_metadata(name, dataset_cfg["additional_config"], commit=False)
            
                self.commit_transaction()
            except Exception as e:
                self.rollback_transaction()  # Rollback if an error occurred
                raise e

    def remove_table(self, name):
        if not self.check_table_exist(name):
            raise Exception(f"Table {name} does not exist")
        with self.transaction_lock:
            try:
                self.start_transaction()
                self.execute(f"DELETE FROM _config WHERE name = '{name}'", commit=False)
                self.execute(f"DELETE FROM _metadata WHERE table_name = '{name}'", commit=False)
                self.execute(f'DROP TABLE IF EXISTS "{name}"', commit=False)
                self.commit_transaction()
            except Exception as e:
                self.rollback_transaction()
                raise e

    """
    config
    """

    def create_or_update_config(self, name, dataset_cfg, is_experiment=True, commit=True):
        """
        Create or update a configuration entry in the _config table for a given table.
        """
        # Serialize dataset_cfg into JSON format
        config_json = json.dumps(dataset_cfg)

        # Check if the configuration already exists
        if self.query_config(name) is not None:
            # Update the existing configuration
            self.execute(
                "UPDATE _config SET config = ?, is_experiment = ? WHERE name = ?",
                (config_json, is_experiment, name),
                commit=commit
            )
        else:
            # Insert a new configuration
            self.execute(
                "INSERT INTO _config (name, config, is_experiment) VALUES (?, ?, ?)",
                (name, config_json, is_experiment),
                commit=commit
            )

    def query_config(self, name):
        config_json = self.execute(
            "SELECT config FROM _config WHERE name=?", params=(name,), fetchone=True
        )

        if config_json is None:
            return None
        else:
            return json.loads(config_json[0])

    def query_dataset_info(self, name):
        """
        Query the dataset information of a given table.
        """
        config = self.query_config(name)

        if config is None:
            return None

        variables = config["variables"]
        objectives = config["objectives"]
        fidelities = config["fidelities"]

        num_rows = self.get_num_row(name)

        dataset_info = {
            "num_variables": len(variables),
            "num_objectives": len(objectives),
            "num_fidelities": len(fidelities),
            "data_number": num_rows,
            **config,
        }
        return dataset_info

    def create_or_update_metadata(self, table_name, metadata, commit=True):
        """
        Create or update a metadata entry in the _metadata table for a given table.
        """
        dataset_selectors_json = json.dumps(metadata.get("DatasetSelectors", {}))
        problem_name = metadata.get("problem_name", "")

        self.execute(
            f"""
            INSERT INTO _metadata (
                table_name, problem_name, dimensions, objectives, fidelities, workloads, budget_type, budget, seeds,
                space_refiner, sampler, pretrain, model, acf, normalizer, dataset_selectors
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ON CONFLICT (table_name) DO UPDATE SET
                problem_name = EXCLUDED.problem_name, dimensions = EXCLUDED.dimensions, objectives = EXCLUDED.objectives, 
                fidelities = EXCLUDED.fidelities, workloads = EXCLUDED.workloads, budget_type = EXCLUDED.budget_type,
                budget = EXCLUDED.budget, seeds = EXCLUDED.seeds, space_refiner = EXCLUDED.space_refiner,
                sampler = EXCLUDED.sampler, pretrain = EXCLUDED.pretrain, model = EXCLUDED.model, 
                acf = EXCLUDED.acf, normalizer = EXCLUDED.normalizer, dataset_selectors = EXCLUDED.dataset_selectors
            """,
            (
                table_name,
                problem_name,
                metadata.get("dim", 0),
                metadata.get("obj", 0),
                metadata.get("fidelity", ""),
                metadata.get("workloads", 0),
                metadata.get("budget_type", ""),
                metadata.get("budget", 0),
                metadata.get("seeds", 0),
                metadata.get("SpaceRefiner", ""),
                metadata.get("Sampler", ""),
                metadata.get("Pretrain", ""),
                metadata.get("Model", ""),
                metadata.get("ACF", ""),
                metadata.get("Normalizer", ""),
                dataset_selectors_json,
            ),
            commit=commit
        )

    def get_all_metadata(self):
        """
        Get the metadata for all tables in the database.
        """
        metadata = self.execute("SELECT * FROM _metadata", fetchall=True)
        return metadata

    def search_tables_by_metadata(self, search_params):
        """
        Search for tables based on metadata criteria.

        Parameters:
        ----------
        search_params : dict
            A dictionary where keys are metadata column names and values are the criteria values.

        Returns:
        -------
        list of str
            A list of table names that match the search criteria.
        """
        if not search_params:
            raise ValueError("Search parameters are required")

        # Constructing the WHERE clause dynamically based on the provided search parameters
        where_clause = self._get_conditions(conditions=search_params)

        query = f"SELECT table_name FROM _metadata{where_clause}"
        result = self.execute(query, fetchall=True)

        return [row[0] for row in result]

    """
    basic operations
    """

    def insert_data(
        self, table, data: Union[dict, list, pd.DataFrame, np.ndarray]
    ) -> list:
        """
        Insert single-row or multiple-row data into the database.

        Parameters
        ----------
        table: str
            Name of the database table to insert into.
        data: dict, list, pd.DataFrame, or np.ndarray
            Data to insert. If a dictionary, it represents a single row of data
            where keys are column names and values are data values. If a list,
            each element represents a row (as a list or dict). If a DataFrame or
            np.ndarray, each row represents a row to be inserted.

        Returns
        -------
        list
            List of row numbers of the inse

        """
        if isinstance(data, dict):
            # Single row insertion from dict
            columns = list(data.keys())
            values = [list(data.values())]
        elif isinstance(data, list):
            # Multiple row insertion from list of dicts or lists
            if all(isinstance(row, dict) for row in data):
                columns = list(data[0].keys())
                values = [list(row.values()) for row in data]
            elif all(isinstance(row, list) for row in data):
                columns = None
                values = data
            else:
                raise ValueError(
                    "All rows in data_list must be of the same type (all dicts or all lists)"
                )
        elif isinstance(data, (pd.DataFrame, np.ndarray)):
            # Convert DataFrame or ndarray to list of lists for insertion
            values = (
                data.tolist() if isinstance(data, np.ndarray) else data.values.tolist()
            )
            columns = data.columns.tolist() if isinstance(data, pd.DataFrame) else None
        else:
            raise ValueError(
                "Data parameter must be a dictionary, list, pandas DataFrame, or numpy ndarray"
            )

        if columns:
            column_str = ",".join([f'"{col}"' for col in columns])
            value_placeholders = ",".join(["?"] * len(columns))
        else:
            column_str = ""
            value_placeholders = ",".join(["?"] * len(values[0]))

        query = f'INSERT INTO "{table}" ({column_str}) VALUES ({value_placeholders})'
        self.executemany(query, values)

        # Get the rowids of the inserted rows
        n_row = self.get_num_row(table)
        len_data = len(data) if isinstance(data, list) else len(values)
        return list(range(n_row - len_data + 1, n_row + 1))

    def _get_conditions(self, rowid=None, conditions=None):
        """
        Construct SQL conditions for a query based on rowid and additional conditions.

        Parameters
        ----------
        rowid: int/list
            Row number(s) of the table to query (if None then no rowid condition is added).
        conditions: dict
            Additional conditions for querying (key: column name, value: column value).

        Returns
        -------
        str
            SQL condition string.
        """
        from collections.abc import Iterable

        conditions_list = []

        # Handling rowid conditions
        if rowid is not None:
            if isinstance(rowid, Iterable) and not isinstance(rowid, str):
                rowid_condition = f'rowid IN ({",".join([str(r) for r in rowid])})'
                conditions_list.append(rowid_condition)
            else:
                conditions_list.append(f"rowid = {rowid}")

        # Handling additional conditions
        if conditions:
            for column, value in conditions.items():
                if isinstance(value, str):
                    value_str = f"'{value}'"  # Strings need to be quoted
                else:
                    value_str = str(value)
                condition_str = f'"{column}" = {value_str}'
                conditions_list.append(condition_str)

        # Combine all conditions with 'AND'
        if conditions_list:
            return " WHERE " + " AND ".join(conditions_list)
        else:
            return ""

    def update_data(self, table, data, rowid=None, conditions=None):
        """
        Update single-row or multiple-row data in the database.

        Parameters
        ----------
        table: str
            Name of the database table to update.
        data: dict or list of dicts
            Data to update. If a dictionary, it represents a single row of data
            where keys are column names and values are data values.
            If a list, each dictionary in the list represents a row to be updated.
        rowid: int/list
            Row number(s) of the table to update. If None, conditions are used.
        conditions: dict
            Additional conditions for updating (key: column name, value: column value).
        """
        if isinstance(data, dict):
            data = [data]

        update_values = []
        for row in data:
            columns = list(row.keys())
            values = list(row.values())
            set_clause = ", ".join([f'"{col}" = ?' for col in columns])
            query = f'UPDATE "{table}" SET {set_clause}'

            if rowid:
                query += f" WHERE rowid = ?"
                values.append(rowid)
            elif conditions:
                condition_str = " AND ".join([f'"{k}" = ?' for k in conditions.keys()])
                query += f" WHERE {condition_str}"
                values.extend(conditions.values())
            else:
                raise ValueError("Either rowid or conditions must be provided")

            update_values.append(values)

        self.executemany(query, update_values)

    def delete_data(self, table, rowid=None, conditions=None):
        """
        Delete single-row or multiple-row data in the database.

        Parameters
        ----------
        table: str
            Name of the database table to delete from.
        rowid: int/list
            Row number(s) of the table to delete. If None, conditions are used.
        conditions: dict
            Additional conditions for deleting (key: column name, value: column value).
        """
        query = f'DELETE FROM "{table}"'
        condition = self._get_conditions(rowid=rowid, conditions=conditions)
        query += condition

        self.execute(query)

    def select_data(
        self, table, columns=None, rowid=None, conditions=None, as_dataframe=False
    ) -> Union[list, pd.DataFrame]:
        """
        Select data in the database.

        Parameters
        ----------
        table: str
            Name of the database table to query.
        column: str/list
            Column name(s) of the table to query (if None then select all columns).
        rowid: int/list
            Row number(s) of the table to query (if None then select all rows).
        conditions: dict
            Additional conditions for querying (key: column name, value: column value).
        as_dataframe: bool
            If True, return the result as a pandas DataFrame.

        Returns
        -------
        list of dicts
            Selected data, each row as a dictionary with column names as keys.
        """
        if columns is None:
            query = f'SELECT * FROM "{table}"'
            columns = self.get_column_names(table)
        elif isinstance(columns, str):
            query = f'SELECT "{columns}" FROM "{table}"'
            columns = [columns]
        else:
            column_str = ",".join([f'"{col}"' for col in columns])
            query = f'SELECT {column_str} FROM "{table}"'

        condition = self._get_conditions(rowid=rowid, conditions=conditions)
        query += condition

        # Convert each tuple in the results to a list
        results = self.execute(query, fetchall=True)
        if as_dataframe:
            return pd.DataFrame(results, columns=columns)
        else:
            # Convert large integer values from string to int if needed
            converted_results = []
            for row in results:
                row_dict = dict(zip(columns, row))
                for key, value in row_dict.items():
                    if isinstance(value, str) and value.isdigit() and key in self.large_integer_columns:
                        row_dict[key] = int(value)
                converted_results.append(row_dict)
            return converted_results

    def get_num_row(self, table):
        query = f'SELECT COUNT(*) FROM "{table}"'
        return self.execute(query, fetchone=True)[0]

    def get_column_names(self, table):
        """Get the column names of a database table."""
        query = f'PRAGMA table_info("{table}")'
        return [col[1] for col in self.execute(query, fetchall=True)]


================================================
FILE: transopt/datamanager/lsh.py
================================================
import numpy as np
from collections import defaultdict

from transopt.datamanager.minhash import MinHasher

class LSHCache:
    def __init__(self, hasher, num_bands=10):
        """
        Initialize the LSH object with the specified number of bands and rows per band.

        Parameters:
        -----------
        hasher: MinHasher
            An object that computes minhashes for a given input text.

        num_bands: int
            The number of bands to divide the minhash signature matrix into.
        """
        assert (
            hasher.num_hashes % num_bands == 0
        ), "num_hashes must be divisible by num_bands"
        
        self.buckets = [defaultdict(set) for _ in range(num_bands)]
        self.hasher = hasher
        
        self.band_width = hasher.num_hashes // num_bands
        self.num_bands = num_bands
        
        self.fingerprints = {}

    def add(self, key, vector):
        """
        Add a multidimensional vector to the cache.

        Parameters:
        -----------
        key: any hashable
            A unique identifier for the vector.

        vector: tuple (str, str, int, int)
            A tuple representing the multidimensional vector. The tuple format is:
            (task_name, variable_names, num_variables, num_objectives)

        """
        if vector is None:
            return
        # Compute a combined fingerprint for the string dimensions
        combined_fp = []
        for dimension in vector[:2]:  # Only take the first two string dimensions
            combined_fp.extend(self.hasher.fingerprint(dimension))

        # Incorporate the integer dimensions by modifying the bucket key
        num_variables = vector[2]
        num_objectives = vector[3]
        
        # Store the combined fingerprint
        self.fingerprints[key] = (combined_fp, num_variables, num_objectives)
        
        # Divide the fingerprint into bands and store in buckets with the integers as part of the key
        for band_idx in range(self.num_bands):
            start = band_idx * self.band_width
            end = start + self.band_width
            band_fp = (tuple(combined_fp[start:end]), num_variables, num_objectives)
            self.buckets[band_idx][band_fp].add(key)

            
    def query(self, vector):
        """
        Query similar vectors in the cache.

        Parameters:
        -----------
        vector: tuple of (str, str, int, int)
            The multidimensional vector to find similar items to. The format is:
            (task_name, variable_names, num_variables, num_objectives)

        Returns:
        --------
        set
            A set of keys of similar vectors.
        """
        if vector is None:
            return set()
        similar_items = set()
        combined_fp = []
        for dimension in vector[:2]:  # Only take the first two string dimensions
            combined_fp.extend(self.hasher.fingerprint(dimension))

        num_variables = vector[2]
        num_objectives = vector[3]

        # Check for similarity across all bands
        for band_idx in range(self.num_bands):
            start = band_idx * self.band_width
            end = start + self.band_width
            band_fp = (tuple(combined_fp[start:end]), num_variables, num_objectives)
            if band_fp in self.buckets[band_idx]:
                similar_items.update(self.buckets[band_idx][band_fp])

        return similar_items
        
        
if __name__ == "__main__":
    # Example usage assuming MinHasher class is defined and imported correctly.
    hasher = MinHasher(num_hashes=200, char_ngram=2, random_state=42)
    lsh_cache = LSHCache(hasher, num_bands=10)
    lsh_cache.add("doc1", ("parameters1", "objectives1", 10, 5))
    lsh_cache.add("doc2", ("parameters2", "objectives2", 10, 5))
    print(lsh_cache.query(("parameters2", "objectives2", 10, 5)))

================================================
FILE: transopt/datamanager/manager.py
================================================
# import cProfile
# import pstats

from transopt.datamanager.database import Database
from transopt.datamanager.lsh import LSHCache
from transopt.datamanager.minhash import MinHasher

from transopt.utils.log import logger


class DataManager:
    _instance = None
    _initialized = False  # 用于保证初始化代码只运行一次

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super(DataManager, cls).__new__(cls)
            cls._instance._initialized = False
        return cls._instance
    
    def __init__(
        self, db=None, num_hashes=100, char_ngram=5, num_bands=25, random_state=12345
    ):
        if not self._initialized:
            if db is None:
                self.db = Database()
            else:
                self.db = db

            self._initialize_lsh_cache(num_hashes, char_ngram, num_bands, random_state)
            self._initialized = True

    def _initialize_lsh_cache(self, num_hashes, char_ngram, num_bands, random_state):
        hasher = MinHasher(
            num_hashes=num_hashes, char_ngram=char_ngram, random_state=random_state
        )
        self.lsh_cache = LSHCache(hasher, num_bands=num_bands)

        datasets = self.db.get_experiment_datasets()

        for dataset in datasets:
            dataset_info = self.db.query_dataset_info(dataset)
            self._add_lsh_vector(dataset, dataset_info)

    def _add_lsh_vector(self, dataset_name, dataset_info):
        vector = self._construct_vector(dataset_info)
        self.lsh_cache.add(dataset_name, vector)

    def _construct_vector(self, dataset_info):
        try:
            num_variables = dataset_info.get("num_variables", len(dataset_info["variables"]))
            num_objectives = dataset_info.get("num_objectives", len(dataset_info["objectives"]))

            variables = dataset_info["variables"]
            variable_names = " ".join([var["name"] for var in variables])
            
            task_name = dataset_info["additional_config"]['problem_name']
            return (task_name, variable_names, num_variables, num_objectives)
        except KeyError:
            logger.error(
                f"""
                Dataset does not have the required information. 
                (num_variables, num_objectives, variables)
                """
            )
            return None

    def search_similar_datasets(self, problem_config):
        vector = self._construct_vector(problem_config)
        similar_datasets = self.lsh_cache.query(vector)
        return similar_datasets

    def search_datasets_by_name(self, dataset_name):
        all_tables = self.db.get_all_datasets()
        matching_tables = [
            table for table in all_tables if dataset_name.lower() in table.lower()
        ]
        return matching_tables

    def get_dataset_info(self, dataset_name):
        return self.db.query_dataset_info(dataset_name)

    def get_experiment_datasets(self):
        return self.db.get_experiment_datasets()
    
    def get_all_datasets(self):
        return self.db.get_all_datasets()

    def create_dataset(self, dataset_name, dataset_info, overwrite=True):
        self.db.create_table(dataset_name, dataset_info, overwrite)
        
        dataset_info_extended = self.db.query_dataset_info(dataset_name)
        self._add_lsh_vector(dataset_name, dataset_info_extended)

    def insert_data(self, dataset_name, data):
        return self.db.insert_data(dataset_name, data)

    def remove_dataset(self, dataset_name):
        return self.db.remove_table(dataset_name)

    def teardown(self):
        self._instance = None
        self._initialized = False
        self.db.close()


def main():
    dm = DataManager(num_hashes=200, char_ngram=5, num_bands=100)

    dataset = dm.db.get_table_list()[0]
    test_query = dm.db.query_dataset_info(dataset)

    sd = dm.search_similar_datasets(dataset, test_query)

    print(dm.db.get_table_list()[:2])

    dm.teardown()


if __name__ == "__main__":
    pass
    # profiler = cProfile.Profile()
    # profiler.run("main()")
    # stats = pstats.Stats(profiler)
    # stats.strip_dirs().sort_stats("time").print_stats(10)


================================================
FILE: transopt/datamanager/minhash.py
================================================
from concurrent.futures import ThreadPoolExecutor

import mmh3
import numpy as np


class MinHasher:
    def __init__(self, num_hashes, char_ngram, random_state=None):
        """
        Parameters:
        -----------
        num_hashes: int
            The number of hash functions to use. A minhash is computed for each
            hash function derived from different random seeds.

        char_ngram: int
            The number of consecutive characters to include in a sliding window
            when creating the document shingles.

        random_state: None, int, np.random.RandomState
            A random state to initialise the random number generator with.
        """
        self.num_hashes = num_hashes
        self.char_ngram = char_ngram

        random_state = np.random.RandomState(random_state)
        self._seeds = random_state.randint(0, 1e6, size=num_hashes)

    @property
    def num_seeds(self):
        return len(self._seeds)

    def get_shingles(self, text):
        """Extract character-based shingles from text."""
        return set(
            text[i : i + self.char_ngram]
            for i in range(len(text) - self.char_ngram + 1)
        )

    def fingerprint(self, text):
        shingles = self.get_shingles(text)
        minhashes = [float("inf")] * self.num_hashes
        for shingle in shingles:
            # Ensure the input is in bytes for mmh3
            encoded_shingle = shingle.encode("utf-8")
            for i, seed in enumerate(self._seeds):
                hash_val = mmh3.hash(encoded_shingle, int(seed)) % (2**32)
                if hash_val < minhashes[i]:
                    minhashes[i] = hash_val
        
        return minhashes
    
    def estimate_similarity(self, fp1, fp2):
        return sum(1 for x, y in zip(fp1, fp2) if x == y) / self.num_hashes


def jaccard_similarity(set1, set2):
    if not isinstance(set1, set):
        set1 = set(set1)
    if not isinstance(set2, set):
        set2 = set(set2)
    return len(set1.intersection(set2)) / len(set1.union(set2))


if __name__ == "__main__":
    text1 = "Lorem Ipsum dolor sit ametsdaasdsad"
    text2 = "Lorem Ipsum dolor sit amet is how dummy text starts"

    # Create a MinHasher instance
    hasher = MinHasher(num_hashes=100, char_ngram=2, random_state=12345)

    # Compute shingles for both texts
    shingles1 = hasher.get_shingles(text1)
    shingles2 = hasher.get_shingles(text2)

    # Compute MinHashes for both texts
    fp1 = hasher.fingerprint(text1)
    fp2 = hasher.fingerprint(text2)

    # Comparing MinHash signatures to estimate similarity
    estimated_similarity = hasher.estimate_similarity(fp1, fp2)
    print(f"Estimated similarity: {estimated_similarity:.4f}")
    print(
        f"Jaccard similarity: {jaccard_similarity(hasher.get_shingles(text1), hasher.get_shingles(text2)):.4f}"
    )


================================================
FILE: transopt/optimizer/MultiObjOptimizer/CauMOpt.py
================================================
import numpy as np
import GPy
from typing import Dict, Union, List

from transopt.optimizer.optimizer_base import BOBase
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer
from transopt.utils.serialization import ndarray_to_vectors,vectors_to_ndarray

from sklearn.ensemble import ExtraTreesRegressor


def calculate_gini_index(labels):
    _, counts = np.unique(labels, return_counts=True)
    probabilities = counts / counts.sum()
    gini = 1 - sum(probabilities ** 2)
    return gini


def features_by_gini(data, labels):
    features_gini = []

    # 遍历每个特征
    for feature_idx in range(data.shape[1]):
        current_feature_values = data[:, feature_idx]

        # 假设的分割方式：基于每个值的基尼指数
        gini_indexes = []
        for split_value in np.unique(current_feature_values):
            left_split = labels[current_feature_values <= split_value]
            right_split = labels[current_feature_values > split_value]

            # 计算左右分割的加权基尼指数
            left_gini = calculate_gini_index(left_split)
            right_gini = calculate_gini_index(right_split)
            weighted_gini = (len(left_split) * left_gini + len(right_split) * right_gini) / len(labels)
            gini_indexes.append(weighted_gini)

        # 取这个特征下的最小基尼指数
        min_gini = min(gini_indexes) if gini_indexes else 1  # 防止某个特征下所有值相同
        features_gini.append((feature_idx, min_gini))

    return features_gini

@optimizer_register("CauMO")
class CauMO(BOBase):
    def __init__(self, config: Dict, rate_oversampling = 4, seed = 0, **kwargs):
        super(CauMO, self).__init__(config=config)

        self.init_method = "Random"
        self.verbose = config.get("verbose", True)
        self.pop_size = config.get("pop_size", 10)
        self.ini_num = self.pop_size

        self.second_space = None
        self.third_space = None

        self.model = []
        self.acf = "CauMOACF"

        self.rate_oversampling = rate_oversampling
        self.num_duplicates = int(rate_oversampling * 4.0)
        self.seed = seed

    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError(
                "Input dimension is not set. Call set_search_space() to set the input dimension."
            )

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info["name"]
                var_domain = var_info["domain"]
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def update_model(self, Data):
        Target_Data = Data["Target"]
        assert "X" in Target_Data

        X = Target_Data["X"]
        Y = Target_Data["Y"]
        assert Y.shape[0] == self.num_objective

        if self.normalizer is not None:
            Y_norm = np.array([self.normalizer(y) for y in Y])

        if len(self.model_list) == 0:
            self.create_model(X, Y_norm)
        else:
            # self.set_data(X, Y_norm)
            self.fit_data(X, Y_norm)
        # try:
        #     for i in range(len(self.model_list)):
        #         self.model_list[i].optimize_restarts(
        #             num_restarts=1, verbose=self.verbose, robust=True
        #         )
        # except np.linalg.linalg.LinAlgError as e:
        #     # break
        #     print("Error: np.linalg.linalg.LinAlgError")

        self.Y_Norm = None
    def create_model(self, X, Y):
        assert self.num_objective is not None

        compile_time_model = ExtraTreesRegressor(
         n_estimators=200,
         max_features='sqrt',
         bootstrap=True,
         random_state=self.seed,
         max_samples = self.rate_oversampling / self.num_duplicates,
        )
        compile_time_model.fit(X, Y[2][:, np.newaxis])

        # Kc = GPy.kern.RBF(input_dim=self.input_dim)
        # compile_time_model = GPy.models.GPRegression(X, Y[2][:, np.newaxis], kernel=Kc, normalizer=None)


        file_size_feature_rank = features_by_gini(X, Y[1])
        self.file_size_rep_feature = sorted(file_size_feature_rank, key=lambda x: x[1])[0][0]

        X_file = X.copy()
        X_file[:, self.file_size_rep_feature] = np.clip(2 * (Y[2] - (-3)) / 6 - 1, -1, 1)

        file_size_model = ExtraTreesRegressor(
         n_estimators=200,
         max_features='sqrt',
         bootstrap=True,
         random_state=self.seed,
         max_samples = self.rate_oversampling / self.num_duplicates,
        )

        file_size_model.fit(X_file, Y[1][:, np.newaxis])

        # Kf = GPy.kern.RBF(input_dim=self.input_dim)
        # file_size_model = GPy.models.GPRegression(X_file, Y[1][:, np.newaxis], kernel=Kf, normalizer=None)

        run_time_feature_rank = features_by_gini(X, Y[0])
        run_time_feature_rank = sorted(run_time_feature_rank, key=lambda x: x[1])
        self.st_run_time_rep_feature = run_time_feature_rank[0][0]
        self.nd_run_time_rep_feature = run_time_feature_rank[1][0]
        X_rtime = X.copy()
        X_rtime[:, self.st_run_time_rep_feature] = np.clip(2 * (Y[2] - (-3)) / 6 - 1, -1, 1)
        X_rtime[:, self.nd_run_time_rep_feature] = np.clip(2 * (Y[1] - (-3)) / 6 - 1, -1, 1)
        # Kr = GPy.kern.RBF(input_dim=self.input_dim)
        # running_time_model = GPy.models.GPRegression(X_rtime, Y[0][:, np.newaxis], kernel=Kr, normalizer=None)
        running_time_model = ExtraTreesRegressor(
         n_estimators=200,
         max_features='sqrt',
         bootstrap=True,
         random_state=self.seed,
         max_samples = self.rate_oversampling / self.num_duplicates,
        )
        running_time_model.fit(X_rtime, Y[0][:, np.newaxis])

        # compile_time_model['.*Gaussian_noise.variance'].constrain_fixed(1.0e-4)
        # compile_time_model['.*rbf.variance'].constrain_fixed(1.0)
        # file_size_model['.*Gaussian_noise.variance'].constrain_fixed(1.0e-4)
        # file_size_model['.*rbf.variance'].constrain_fixed(1.0)
        # running_time_model['.*Gaussian_noise.variance'].constrain_fixed(1.0e-4)
        # running_time_model['.*rbf.variance'].constrain_fixed(1.0)

        self.model_list.append(compile_time_model)
        self.model_list.append(file_size_model)
        self.model_list.append(running_time_model)

    def set_data(self, X, Y):
        self.model_list[0].set_XY(X, Y[2][:, np.newaxis])
        file_size_feature_rank = features_by_gini(X, Y[1])
        self.file_size_rep_feature = sorted(file_size_feature_rank, key=lambda x: x[1])[0][0]
        X_file = X.copy()
        X_file[:, self.file_size_rep_feature] = np.clip(2 * (Y[1] - (-3)) / 6 - 1, -1, 1)
        self.model_list[1].set_XY(X_file, Y[1][:, np.newaxis])

        run_time_feature_rank = features_by_gini(X, Y[0])
        run_time_feature_rank = sorted(run_time_feature_rank, key=lambda x: x[1])
        self.st_run_time_rep_feature = run_time_feature_rank[0][0]
        self.nd_run_time_rep_feature = run_time_feature_rank[1][0]
        X_rtime = X.copy()
        X_rtime[:, self.st_run_time_rep_feature] = np.clip(2 * (Y[2] - (-3)) / 6 - 1, -1, 1)
        X_rtime[:, self.nd_run_time_rep_feature] = np.clip(2 * (Y[1] - (-3)) / 6 - 1, -1, 1)
        self.model_list[2].set_XY(X_rtime, Y[0][:, np.newaxis])


    def fit_data(self, X, Y):
        self.model_list[0].fit(X, Y[2][:, np.newaxis])
        file_size_feature_rank = features_by_gini(X, Y[1])
        self.file_size_rep_feature = sorted(file_size_feature_rank, key=lambda x: x[1])[0][0]
        X_file = X.copy()
        X_file[:, self.file_size_rep_feature] = np.clip(2 * (Y[1] - (-3)) / 6 - 1, -1, 1)
        self.model_list[1].fit(X_file, Y[1][:, np.newaxis])

        run_time_feature_rank = features_by_gini(X, Y[0])
        run_time_feature_rank = sorted(run_time_feature_rank, key=lambda x: x[1])
        self.st_run_time_rep_feature = run_time_feature_rank[0][0]
        self.nd_run_time_rep_feature = run_time_feature_rank[1][0]
        X_rtime = X.copy()
        X_rtime[:, self.st_run_time_rep_feature] = np.clip(2 * (Y[2] - (-3)) / 6 - 1, -1, 1)
        X_rtime[:, self.nd_run_time_rep_feature] = np.clip(2 * (Y[1] - (-3)) / 6 - 1, -1, 1)
        self.model_list[2].fit(X_rtime, Y[0][:, np.newaxis])


    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if "normalize" in self.config:
                self.normalizer = get_normalizer(self.config["normalize"])

            Data = {"Target": {"X": self._X, "Y": self._Y}}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(
                None, context_manager=None
            )
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(
                self._get_var_name("search"), suggested_sample
            )
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def observe(self, input_vectors: Union[List[Dict], Dict], output_value: Union[List[Dict], Dict]) -> None:
        super().observe(input_vectors, output_value)


        if "normalize" in self.config:
            self.normalizer = get_normalizer(self.config["normalize"])

        self.Y_Norm = np.array([self.normalizer(y) for y in self._Y])
    def predict(self, X, full_cov=False):

        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))

        # compile_time_mean, compile_time_var = self.model_list[0].predict(X, full_cov=full_cov)
        compile_time_mean, compile_time_var = self.raw_predict(X, self.model_list[0])
        X_file = X.copy()
        X_file[:, self.file_size_rep_feature] = np.clip(2 * (compile_time_mean[:, 0] - (-3)) / 6 - 1, -1, 1)
        file_size_mean, file_size_var = self.raw_predict(X_file, self.model_list[1])

        X_run = X.copy()
        X_run[:, self.st_run_time_rep_feature] = np.clip(2 * (compile_time_mean[:, 0] - (-3)) / 6 - 1, -1, 1)
        X_run[:, self.nd_run_time_rep_feature] = np.clip(2 * (file_size_mean[:, 0] - (-3)) / 6 - 1, -1, 1)
        run_time_mean, run_time_var = self.raw_predict(X_run, self.model_list[2])

        pred_mean = np.hstack((pred_mean, run_time_mean, file_size_mean, compile_time_mean))

        if full_cov:
            pred_var = np.hstack((pred_var, run_time_var, file_size_var, compile_time_var))
        else:
            pred_var = np.hstack((pred_var, run_time_var, file_size_var, compile_time_var))

        # pred_mean = np.append(pred_mean, run_time_mean)
        # pred_mean = np.append(pred_mean, file_size_mean)
        # pred_mean = np.append(pred_mean, compile_time_mean)
        # pred_var = np.append(pred_var, run_time_var)
        # pred_var = np.append(pred_var, file_size_var)
        # pred_var = np.append(pred_var, compile_time_var)

        return pred_mean, pred_var

    def raw_predict(self, X, model):
        _X_test = X.copy()

        mu = model.predict(_X_test)
        cov = self.raw_predict_var(_X_test, model, mu)
        return mu[:,np.newaxis], cov[:,np.newaxis]

    def raw_predict_var(self, X, trees,  predictions, min_variance=0.1):
        std = np.zeros(len(X))
        for tree in trees:
            var_tree = tree.tree_.impurity[tree.apply(X)]

            # This rounding off is done in accordance with the
            # adjustment done in section 4.3.3
            # of http://arxiv.org/pdf/1211.0906v2.pdf to account
            # for cases such as leaves with 1 sample in which there
            # is zero variance.
            var_tree[var_tree < min_variance] = min_variance
            mean_tree = tree.predict(X)
            std += var_tree + mean_tree ** 2

        std /= len(trees)
        std -= predictions ** 2.0
        std[std < 0.0] = 0.0
        std = std ** 0.5
        return std
    def model_reset(self):
        self.model_list = []
        self.kernel_list = []

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self._X)

        return m.min()

    def get_fmin_by_id(self, idx):
        "Get the minimum of the current model."
        m, v = self.predict_by_id(self._X, idx)

        return m.min()


================================================
FILE: transopt/optimizer/MultiObjOptimizer/IEIPV.py
================================================


================================================
FILE: transopt/optimizer/MultiObjOptimizer/MoeadEGO.py
================================================
import GPy, GPyOpt
import numpy as np
from typing import Dict, Union, List

from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer
from transopt.utils.weights import init_weight, tchebycheff

# from utils.common import findKBest
# from revision.multiobjective_bayesian_optimization import MultiObjectiveBayesianOptimization
# from revision.weighted_gpmodel import WeightedGPModel
# from revision.multiobjective_EI import MultiObjectiveAcquisitionEI

@optimizer_register("MoeadEGO")
class MoeadEGO(BOBase):
    def __init__(self, config: Dict, **kwargs):
        super(MoeadEGO, self).__init__(config=config)

        self.init_method = "Random"
        self.verbose = config.get("verbose", True)
        self.n_weight = config.get("n_weight", 10)
        self.pop_size = config.get("pop_size", self.n_weight)

        if self.pop_size > self.n_weight:
            self.pop_size = self.n_weight

        self.ini_num = self.pop_size
        
        self.model = []
        self.acf = "MOEADEGO"
        self.weight = None

    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError(
                "Input dimension is not set. Call set_search_space() to set the input dimension."
            )

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info["name"]
                var_domain = var_info["domain"]
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples


    def update_model(self, Data):
        Target_Data = Data["Target"]
        assert "X" in Target_Data

        X = Target_Data["X"]
        Y = Target_Data["Y"]
        assert Y.shape[0] == self.num_objective

        if self.normalizer is not None:
            Y_norm = np.array([self.normalizer(y) for y in Y])

        if len(self.model_list) == 0:
            self.create_model(X, Y_norm)
        else:
            ideal_point = np.min(Y_norm.T, axis=0)
            for i in range(len(self.model_list)):
                Y_weighted = tchebycheff(Y.T, self.weight[i], ideal=ideal_point)
                self.model_list[i].set_XY(X, Y_weighted)

        try:
            for i in range(len(self.model_list)):
                self.model_list[i].optimize_restarts(
                    num_restarts=1, verbose=self.verbose, robust=True
                )
        except np.linalg.linalg.LinAlgError as e:
            # break
            print("Error: np.linalg.linalg.LinAlgError")

    def create_model(self, X, Y):
        assert self.num_objective is not None

        ideal_point = np.min(Y.T, axis=0)
        self.weight = init_weight(self.num_objective, self.n_weight)
        self.n_weight = self.weight.shape[0]

        for i in range(self.n_weight):
            kernel = GPy.kern.RBF(input_dim = self.input_dim)

            Y_weighted = tchebycheff(Y.T, self.weight[i], ideal=ideal_point)

            model = GPy.models.GPRegression(X, Y_weighted, kernel=kernel, normalizer=None)
            model['.*Gaussian_noise.variance'].constrain_fixed(1.0e-4)
            model['.*rbf.variance'].constrain_fixed(1.0)
            self.kernel_list.append(model.kern)
            self.model_list.append(model)

    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if "normalize" in self.config:
                self.normalizer = get_normalizer(self.config["normalize"])

            Data = {"Target": {"X": self._X, "Y": self._Y}}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(
                None, context_manager=None
            )
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(
                self._get_var_name("search"), suggested_sample
            )
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def predict(self, X, full_cov=False):
        # X_copy = np.array([X])
        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))
        for model in self.model_list:
            mean, var = model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var

    def predict_by_id(self, X, idx, full_cov=False):
        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))
            mean, var = self.model_list[idx].predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var

    def model_reset(self):
        self.model_list = []
        self.kernel_list = []

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self._X)

        return m.min()

    def get_fmin_by_id(self, idx):
        "Get the minimum of the current model."
        m, v = self.predict_by_id(self._X, idx)

        return m.min()


================================================
FILE: transopt/optimizer/MultiObjOptimizer/ParEGO.py
================================================
import numpy as np
import GPy
from typing import Dict, Union, List

from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer


@optimizer_register("ParEGO")
class ParEGO(BOBase):
    def __init__(self, config: Dict, **kwargs):
        super(ParEGO, self).__init__(config=config)

        self.init_method = "Random"

        if "verbose" in config:
            self.verbose = config["verbose"]
        else:
            self.verbose = True

        if "init_number" in config:
            self.ini_num = config["init_number"]
        else:
            self.ini_num = None

        self.acf = "EI"
        self.rho = 0.1

    def scalarization(self, Y: np.ndarray, rho):
        """
        scalarize observed output data
        """
        theta = np.random.random_sample(Y.shape[0])
        sum_theta = np.sum(theta)
        theta = theta / sum_theta

        theta_f = Y.T * theta
        max_k = np.max(theta_f, axis=1)
        rho_sum_theta_f = rho * np.sum(theta_f, axis=1)

        return max_k + rho_sum_theta_f

    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        return self.random_sample(self.ini_num)
        
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if "normalize" in self.config:
                self.normalizer = get_normalizer(self.config["normalize"])

            Data = {"Target": {"X": self._X, "Y": self._Y}}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(
                None, context_manager=None
            )
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(
                self._get_var_name("search"), suggested_sample
            )
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def update_model(self, Data):
        Target_Data = Data["Target"]
        assert "X" in Target_Data

        X = Target_Data["X"]
        Y = Target_Data["Y"]
        assert Y.shape[0] == self.num_objective

        if self.normalizer is not None:
            Y_norm = np.array([self.normalizer(y) for y in Y])

        Y_scalar = self.scalarization(Y_norm, 0.1)[:, np.newaxis]

        if len(self.model_list) == 0:
            self.create_model(X, Y_scalar)
        else:
            self.model_list[0].set_XY(X, Y_scalar)

        try:
            self.model_list[0].optimize_restarts(
                num_restarts=1, verbose=self.verbose, robust=True
            )
        except np.linalg.linalg.LinAlgError as e:
            # break
            print("Error: np.linalg.linalg.LinAlgError")

    def create_model(self, X, Y):
        assert self.num_objective is not None

        kernel = GPy.kern.RBF(input_dim=self.input_dim)
        model = GPy.models.GPRegression(X, Y, kernel=kernel, normalizer=None)
        model[".*Gaussian_noise.variance"].constrain_fixed(1.0e-4)
        model[".*rbf.variance"].constrain_fixed(1.0)
        self.kernel_list.append(model.kern)
        self.model_list.append(model)
        print("model state")
        for i, model in enumerate(self.model_list):
            print("--------model for {}th object--------".format(i))
            print(model)

    def predict(self, X, full_cov=False):
        # X_copy = np.array([X])
        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))
        for model in self.model_list:
            mean, var = model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError(
                "Input dimension is not set. Call set_search_space() to set the input dimension."
            )

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info["name"]
                var_domain = var_info["domain"]
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def model_reset(self):
        self.model_list = []
        self.kernel_list = []

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self._X)

        return m.min()


================================================
FILE: transopt/optimizer/MultiObjOptimizer/SMSEGO.py
================================================
import numpy as np
import GPy
from typing import Dict, Union, List
from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from agent.registry import optimizer_register

from transopt.utils.Normalization import get_normalizer


@optimizer_register('SMSEGO')
class SMSEGO(BOBase):
    def __init__(self, config:Dict, **kwargs):
        super(SMSEGO, self).__init__(config=config)

        self.init_method = 'Random'

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        self.acf = 'SMSEGO'

    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def suggest(self, n_suggestions:Union[None, int] = None) ->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])


            Data = {'Target':{'X':self._X, 'Y':self._Y}}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def update_model(self, Data):
        Target_Data = Data['Target']
        assert 'X' in Target_Data

        X = Target_Data['X']
        Y = Target_Data['Y']
        assert Y.shape[0] == self.num_objective

        if self.normalizer is not None:
            Y_norm = np.array([self.normalizer(y) for y in Y])


        if len(self.model_list) == 0:
            self.create_model(X, Y_norm)
        else:
            for i in range(self.num_objective):
                self.model_list[i].set_XY(X, Y_norm[i].T[:, np.newaxis])

        try:
            for i in range(self.num_objective):
                self.model_list[i].optimize_restarts(num_restarts=1, verbose=self.verbose, robust=True)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')

    def create_model(self, X, Y):
        assert self.num_objective is not None
        assert self.num_objective == Y.shape[0]

        for l in range(self.num_objective):
            kernel = GPy.kern.RBF(input_dim = self.input_dim)
            model = GPy.models.GPRegression(X, Y[l][:, np.newaxis], kernel=kernel, normalizer=None)
            model['.*Gaussian_noise.variance'].constrain_fixed(1.0e-4)
            model['.*rbf.variance'].constrain_fixed(1.0)
            self.kernel_list.append(model.kern)
            self.model_list.append(model)
        print("model state")
        for i, model in enumerate(self.model_list):
            print("--------model for {}th object--------".format(i))
            print(model)

    def predict(self, X, full_cov=False):
        # X_copy = np.array([X])
        if len(X.shape) ==1 :
            X = X[np.newaxis,:]
        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))
        for model in self.model_list:
            mean, var = model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var


    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def model_reset(self):
        self.model_list = []
        self.kernel_list = []

    def get_fmin(self):
        "Get the minimum of the current model."
        pass


================================================
FILE: transopt/optimizer/MultiObjOptimizer/__init__.py
================================================
from transopt.optimizer.MultiObjOptimizer.ParEGO import ParEGO
from transopt.optimizer.MultiObjOptimizer.SMSEGO import SMSEGO
from transopt.optimizer.MultiObjOptimizer.CauMOpt import CauMO
from transopt.optimizer.MultiObjOptimizer.MoeadEGO import MoeadEGO

================================================
FILE: transopt/optimizer/SingleObjOptimizer/KrigingOptimizer.py
================================================
import GPy
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.algorithms.soo.nonconvex.de import DE
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.algorithms.soo.nonconvex.pso import PSO
from typing import Dict, Union, List

from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import vectors_to_ndarray, output_to_ndarray
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer


@optimizer_register('KrigingEA')
class KrigingEA(BOBase):
    def __init__(self, config: Dict, **kwargs):
        super(KrigingGA, self).__init__(config=config)

        self.init_method = 'latin'
        self.model = None
        self.ea = None
        self.problem = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'ea' in config:
            self.ea_name = config['ea']
        else:
            self.ea_name = 'GA'

        # model_manage: 'best' or 'pre-select' or 'generation'
        if 'model_manage' in config:
            self.model_manage = config['model_manage']
        else:
            self.model_manage = 'best'

        # 'best':k best individual, 'pre-select' and 'generation': every k generation
        if 'k' in config:
            self.k = config['k']
        else:
            self.k = 1

        self.pop = None
        self.pop_num = self.ini_num

    def initial_sample(self):
        return self.sample(self.ini_num)

    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            Data = {'Target': {'X': self._X, 'Y': self._Y}}
            self.update_model(Data)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            # 得到新的种群
            self.pop = self.ea.ask()
            # 模型管理策略，选择需要准确评估的个体
            elites = self.model_manage_strategy().reshape(-1, self.input_dim)
            # 准确评估优秀个体
            suggested_sample = self.search_space.zip_inputs(elites)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def observe(self, input_vectors: Union[List[Dict], Dict], output_value: Union[List[Dict], Dict]) -> None:
        self._data_handler.add_observation(input_vectors, output_value)

        # Convert dict to list of dict
        if isinstance(input_vectors, Dict):
            input_vectors = [input_vectors]
        if isinstance(output_value, Dict):
            output_value = [output_value]

        # Check if the lists are empty and return if they are
        if len(input_vectors) == 0 and len(output_value) == 0:
            return


        self._validate_observation('design', input_vectors=input_vectors, output_value=output_value)
        X = self.transform(input_vectors)

        self._X = np.vstack((self._X, vectors_to_ndarray(self._get_var_name('search'), X))) if self._X.size else vectors_to_ndarray(self._get_var_name('search'), X)
        self._Y = np.vstack((self._Y, output_to_ndarray(output_value))) if self._Y.size else output_to_ndarray(output_value)

        if self.pop is not None:
            self.pop[self.elites_idx].F = output_value
            # 将 pop 返回给 EA
            self.ea.tell(infills=self.pop)

    def update_model(self, Data):
        assert 'Target' in Data
        target_data = Data['Target']
        X = target_data['X']
        Y = target_data['Y']

        if self.normalizer is not None:
            Y = self.normalizer(Y)

        if self.obj_model == None:
            self.create_model(X, Y)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            self.create_ea()
        else:
            self.obj_model.set_XY(X, Y)

        try:
            self.obj_model.optimize_restarts(num_restarts=1, verbose=self.verbose, robust=True)
        except np.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.LinAlgError')

    def create_model(self, X, Y):
        kern = GPy.kern.RBF(self.input_dim, ARD=True)
        self.obj_model = GPy.models.GPRegression(X, Y, kernel=kern)
        self.obj_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)

    def create_ea(self):
        if self.ea_name == 'GA':
            self.ea = GA(self.pop_num)
        elif self.ea_name == 'DE':
            self.ea = DE(self.pop_num)
        elif self.ea_name == 'PSO':
            self.ea = PSO(self.pop_num)
        elif self.ea_name == 'CMAES':
            self.ea = CMAES(self.pop_num)
        self.ea.setup(self.problem, verbose=False)

    def predict(self, X):
        if X.ndim == 1:
            X = X[None, :]

        m, v = self.obj_model.predict(X)
        return m, v

    def sample(self, num_samples: int) -> List[Dict]:
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        temp = None
        if self.init_method == 'latin':
            temp = np.random.rand(num_samples, self.input_dim)
            for i in range(self.input_dim):
                temp[:, i] = (temp[:, i] + np.random.permutation(np.arange(num_samples))) / num_samples

        samples = []
        for i in range(num_samples):
            sample = {}
            for j, var_info in enumerate(self.search_space.config_space):
                var_name = var_info['name']
                var_domain = var_info['domain']
                if self.init_method == 'random':
                    value = np.random.uniform(var_domain[0], var_domain[1])
                elif self.init_method == 'latin':
                    value = temp[i][j] * (var_domain[1] - var_domain[0]) + var_domain[0]
                sample[var_name] = value
            samples.append(sample)

        samples = self.inverse_transform(samples)
        return samples

    def model_reset(self):
        self.obj_model = None

    def get_fmin(self):
        m, v = self.predict(self.obj_model.X)
        return m.min()

    def reset(self, task_name:str, design_space:Dict, search_sapce:Union[None, Dict] = None):
        self.set_space(design_space, search_sapce)
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self._data_handler.reset_task(task_name, design_space)
        self.sync_data(self._data_handler.get_input_vectors(), self._data_handler.get_output_value())
        self.model_reset()

    def model_manage_strategy(self):
        self.ea.evaluator.eval(self.problem, self.pop)
        pop_X = np.array([p.X for p in self.pop])
        pop_F = np.array([p.F for p in self.pop])
        if self.model_manage == 'best':
            top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
            elites = self.pop_X[top_k_idx]
        elif self.model_manage == 'pre-select':
            total_pop_X = pop_X
            total_pop_F = pop_F
            for i in range(self.k - 1):
                pop = self.ea.ask()
                self.ea.evaluator.eval(self.problem, pop)
                pop_X = np.array([p.X for p in pop])
                pop_F = np.array([p.F for p in pop])
                total_pop_X = np.concatenate((total_pop_X, pop_X))
                total_pop_F = np.concatenate((total_pop_F, pop_F))
            top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.ini_num]
            elites = total_pop_X[top_k_idx]
        elif self.model_manage == 'generation':
            for i in range(self.k - 1):
                pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            top_k_idx = range(len(pop_X))
            elites = pop_X
        else:
            raise ValueError(f"Invalid model manage strategy: {self.model_manage}")
        self.elites_idx = top_k_idx
        return elites


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/SingleObjOptimizer/LFL.py
================================================
import numpy as np
import GPy
from paramz import ObsAr
from optimizer.acquisition_function.get_acf import get_ACF
from transopt.optimizer.acquisition_function.sequential import Sequential
from typing import Dict, Union, List
from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Kernel import construct_multi_objective_kernel
from transopt.optimizer.model.MPGP import MPGP
from optimizer.model.GP_BAK import PriorGP
from transopt.utils import Prior

from GPy import util
from GPy.inference.latent_function_inference import expectation_propagation
from GPy.inference.latent_function_inference import ExactGaussianInference
from GPy.likelihoods.multioutput_likelihood import MixedNoise

from transopt.utils.Normalization import get_normalizer

@optimizer_register('LFL')
class LFLOptimizer(BOBase):
    def __init__(self, config:Dict, **kwargs):
        super(LFLOptimizer, self).__init__(config=config)

        self.init_method = 'LFL'
        self.knowledge_num = 2
        self.ini_quantile = 0.5
        self.anchor_points = None
        self.anchor_num = None
        self.model = None
        self.output_dim = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = False

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'acf' in config:
            self.acf = config['acf']
        else:
            self.acf = 'EI'


    def reset(self, design_space:Dict, search_sapce:Union[None, Dict] = None):
        self.set_space(design_space, search_sapce)
        self.obj_model = None
        self.var_model = None
        self.output_dim = None
        self.acqusition = get_ACF(self.acf, model=self, search_space=self.search_space, config=self.config)
        self.evaluator = Sequential(self.acqusition)


    def initial_sample(self):
        if self.anchor_points is None:
            self.anchor_num = int(self.ini_quantile * self.ini_num)
            self.anchor_points  = self.random_sample(self.anchor_num)

        random_samples = self.random_sample(self.ini_num - self.anchor_num)
        samples = self.anchor_points.copy()
        samples.extend(random_samples)

        return samples

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples


    def combine_data(self):
        if len(self.aux_data) == 0:
            return {'Target':{'X':self._X, 'Y':self._Y}}
        else:
            return {}

    def suggest(self, n_suggestions:Union[None, int] = None)->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            if self.aux_data is not None:
                pass
            else:
                self.aux_data = {}

            Data = self.combine_data()
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'),suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def create_model(self, X_list, Y_list, mf=None, prior:list=[]):
        X, Y, output_index = util.multioutput.build_XY(X_list, Y_list)

        if self.output_dim > 1:
            K = construct_multi_objective_kernel(self.input_dim, self.output_dim, base_kernel='RBF', Q=1, rank=2)
            inference_method = ExactGaussianInference()
            likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_obj_%s" % j) for y, j in
                                zip(Y, range(self.output_dim))]
            likelihood = MixedNoise(likelihoods_list=likelihoods_list)

            self.obj_model = MPGP(X, Y, K, likelihood, Y_metadata={'output_index': output_index},
                                  inference_method=inference_method, mean_function=mf, name=f'OBJ MPGP')

            self.obj_model['mixed_noise.Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)
            # self.obj_model['constmap.C'].constrain_fixed(0)
            self.obj_model['ICM0.B.kappa'].constrain_fixed(np.zeros(shape=(self.output_dim,)))

        else:
            if 'kernel' in self.config:
                kern = GPy.kern.RBF(self.input_dim, ARD=False)
            else:
                kern = GPy.kern.RBF(self.input_dim, ARD=False)
            X = X_list[0]
            Y = Y_list[0]

            self.obj_model = PriorGP(X, Y, kernel=kern, mean_function = mf)
            self.obj_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)


        if len(prior) == 0:
            self.prior_list = []
            self.prior_list.append(Prior.LogGaussian(1, 2, 'lengthscale'))
            self.prior_list.append(Prior.LogGaussian(0.5, 2, 'variance'))
        else:
            self.prior_list = prior

        for i in range(len(self.prior_list)):
            self.obj_model.set_prior(self.prior_list[i])

    def update_model(self, Data):
        ## Train target model
        assert 'Target' in Data
        target_data = Data['Target']
        X_list = []
        Y_list = []

        if 'History' in Data:
            history_data = Data['History']
            X_list.extend(list(history_data['X']))
            Y_list.extend(list(history_data['Y']))
            source_num = len(history_data['Y'])
        else:
            source_num = 0
            history_data = {}

        if 'Gym' in Data:
            Gym_data = Data['Gym']
            gym_num = len(Gym_data['Gym'])
            X_list.extend(list(Gym_data['X']))
            Y_list.extend(list(Gym_data['Y']))
        else:
            gym_num = 0
            Gym_data = {}

        output_dim = gym_num + source_num + 1

        X_list.append(target_data['X'])
        Y_list.append(target_data['Y'])

        if self.normalizer is not None:
            Y_list = self.normalizer(Y_list)

        if self.output_dim != output_dim:
            self.output_dim = output_dim
            self.create_model(X_list, Y_list, prior=[])
        else:
            self.set_XY(X_list, Y_list)
            if self.var_model is not None:
                self.var_model.set_XY(target_data['X'][0], target_data['Y'][0])

        try:
            self.obj_model.optimize_restarts(messages=False, num_restarts=1,
                                             verbose=self.verbose)
            if self.var_model is not None:
                self.var_model.optimize_restarts(messages=False, num_restarts=1,
                                                verbose=self.verbose)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')


    def predict(self, X):
        """
        Predictions with the model. Returns posterior means and standard deviations at X. Note that this is different in GPy where the variances are given.

        Parameters:
            X (np.ndarray) - points to run the prediction for.
            with_noise (bool) - whether to add noise to the prediction. Default is True.
        """
        if X.ndim == 1:
            X = X[None,:]
        task_id = self.output_dim - 1

        if self.output_dim >1:
            noise_dict  = {'output_index': np.array([task_id] * X.shape[0])[:,np.newaxis].astype(int)}
            X = np.hstack((X, noise_dict['output_index']))

            m, v = self.obj_model.predict(X, Y_metadata=noise_dict, full_cov=False, include_likelihood=True)
            v = np.clip(v, 1e-10, np.inf)

        else:
            m, v = self.obj_model.predict(X)

        # We can take the square root because v is just a diagonal matrix of variances
        return m, v

    def var_predict(self, X):
        if X.ndim == 1:
            X = X[None,:]
        task_id = self.output_dim - 1

        if self.model_name == 'MOGP':
            noise_dict  = {'output_index': np.array([task_id] * X.shape[0])[:,np.newaxis].astype(int)}
            X = np.hstack((X, noise_dict['output_index']))

            _, v1 = self.var_model.predict(X)
            v1 = np.clip(v1, 1e-10, np.inf)
            v = v1
        else:
            m, v = self.obj_model.predict(X)

        # We can take the square root because v is just a diagonal matrix of variances
        return v

    def obj_posterior_samples(self, X, sample_size):
        if X.ndim == 1:
            X = X[None,:]
        task_id = self.output_dim - 1

        if self.model_name == 'SHGP' or \
                self.model_name == 'HGP' or \
                self.model_name == 'MHGP' or \
                self.model_name == 'BHGP' or \
                self.model_name == 'RPGE':
            samples_obj = self.posterior_samples(X, model_id=0,size=sample_size)
        elif self.model_name == 'MOGP':
            noise_dict = {'output_index': np.array([task_id] * X.shape[0])[:, np.newaxis].astype(int)}
            X_zip = np.hstack((X, noise_dict['output_index']))

            samples_obj = self.obj_model.posterior_samples(X_zip, size=sample_size, Y_metadata=noise_dict) # grid * 1 * sample_num

        else:
            raise NameError

        return samples_obj

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self.obj_model.X)

        return m.min()

    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.obj_model.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.obj_model.output_index = util.multioutput.build_XY(Y, Y)

        self.obj_model.update_model(False)
        if Y is not None:
            self.obj_model.Y = ObsAr(Y)
            self.obj_model.Y_normalized = self.obj_model.Y
        if X is not None:
            self.obj_model.X = ObsAr(X)

        self.obj_model.Y_metadata = {'output_index': self.obj_model.output_index, 'trials': np.ones(self.obj_model.output_index.shape)}
        if isinstance(self.obj_model.inference_method, expectation_propagation.EP):
            self.obj_model.inference_method.reset()
        self.obj_model.update_model(True)

    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        #orig_shape = gp.shape
        gp = gp.flatten()
        Ysim = np.array([np.random.normal(gpj, scale=np.sqrt(1e-2), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)

    def posterior_samples_f(self,X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """
        m, v = self.obj_model.predict(X, return_full=True)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X, model_id=model_id, size=size)

        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = self.samples(fsim[:, d])
        else:
            fsim = self.samples(fsim)
        return fsim

    def get_model_para(self):

        if self.model_name == 'MOGP':
            lengthscale = self.obj_model['.*lengthscale'][0]
            variance = self.obj_model['.*rbf.*variance'][0]
        else:
            lengthscale = self.obj_model['rbf.*lengthscale'][0]
            variance = self.obj_model['rbf.*variance'][0]

        return lengthscale, variance

    def update_prior(self, parameters):
        for k, v in parameters.items():
            prior = self.obj_model.get_prior(k)
            cur_stat = prior.getstate()
            # mu = (self.kappa * cur_stat[0] + v) / (self.kappa + 1)
            # var = cur_stat[1] + (self.kappa * (v - cur_stat[0]) ** 2) / (2.0 * (self.kappa + 1.0))
            mu = np.mean(parameters[k])
            var = np.var(parameters[k])

            self.obj_model.update_prior(k, [mu, var])

================================================
FILE: transopt/optimizer/SingleObjOptimizer/MetaLearningOptimizer.py
================================================
import numpy as np
import GPy
import GPyOpt
from GPy import util
from paramz import ObsAr

from GPy.inference.latent_function_inference import expectation_propagation
from transopt.optimizer.optimizer_base import OptimizerBase
# from Model.HyperBO import hyperbo

from external.transfergpbo import models

from emukit.core import ContinuousParameter
from emukit.core import ParameterSpace

from external.FSBO.fsbo_modules import FSBO, DeepKernelGP
from external.FSBO.fsbo_utils import totorch
import os

from external.transfergpbo.models import (
    WrapperBase,
    MHGP,
    SHGP,
    BHGP,
)


def get_model(
    model_name: str, space: ParameterSpace
) -> WrapperBase:
    """Create the model object."""
    model_class = getattr(models, model_name)
    if model_class == MHGP or model_class == SHGP or model_class == BHGP:
        model = model_class(space.dimensionality)
    else:
        kernel = GPy.kern.RBF(space.dimensionality)
        model = model_class(kernel=kernel)
    model = WrapperBase(model)

    return model

class MetaBOOptimizer(OptimizerBase):
    analytical_gradient_prediction = False  # --- Needed in all models to check is the gradients of acquisitions are computable.

    def __init__(self, Xdim, bounds, kernel='RBF', likelihood=None, model_name='MOGP', acf_name='EI',
                 optimizer='bfgs',  verbose=True, seed = 0):
        self.kernel = kernel
        self.likelihood = likelihood
        self.Xdim = Xdim
        self.bounds = bounds
        self.acf_name = acf_name
        self.Seed = seed
        self.name = 'meta'

        # Set decision space
        Variables = []
        task_design_space = []
        for var in range(Xdim):
            v_n = f'x{var + 1}'
            Variables.append(ContinuousParameter(v_n, self.bounds[0][var], self.bounds[1][var]))
            var_dic = {'name': f'var_{var}', 'type': 'continuous',
                       'domain': tuple([self.bounds[0][var], self.bounds[1][var]])}
            task_design_space.append(var_dic.copy())
        self.model_space = ParameterSpace(Variables)
        self.acf_space = GPyOpt.Design_space(space=task_design_space)

        self.optimizer = optimizer
        self.verbose = verbose


    def create_model(self, model_name, Meta_data, Target_data):
        self.model_name = model_name
        source_num = len(Meta_data['Y'])
        self.output_dim = source_num + 1

        ###Construct objective model
        if self.model_name == 'HyperBO':
            self.obj_model = hyperbo()
            self.obj_model.pretrain(Meta_data, Target_data)

        elif self.model_name == 'FSBO':
            checkpoint_path = './External/FSBO/checkpoints/'
            self.training_model = FSBO(input_size=self.Xdim, checkpoint_path = checkpoint_path, batch_size=len(Meta_data['X'][0]))
            train_data = {}
            for i in range(source_num):
                train_data[i] = {'X':Meta_data['X'][i], 'y':Meta_data['Y'][i]}
            self.training_model.set_data(train_data=train_data)
            self.training_model.meta_train(epochs=1000)
            log_dir = os.path.join(checkpoint_path, "log.txt"),
            self.obj_model = DeepKernelGP(epochs = 1000, input_size=self.Xdim, checkpoint = checkpoint_path + f'Seed_{self.Seed}_{source_num+1}', log_dir= log_dir, seed=self.Seed)
            self.device = 'cpu'
            self.obj_model.X_obs, self.obj_model.y_obs = totorch(Target_data['X'], self.device), totorch(Target_data['Y'], self.device).reshape(-1)
            self.obj_model.train()

        else:
            if self.kernel == None or self.kernel == 'RBF':
                kern = GPy.kern.RBF(self.Xdim, ARD=True)
            else:
                kern = GPy.kern.RBF(self.Xdim, ARD=True)
            X = Target_data['X']
            Y = Target_data['Y']

            self.obj_model = GPy.models.GPRegression(X, Y, kernel=kern)
            self.obj_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)
            try:
                self.obj_model.optimize_restarts(messages=True, num_restarts=1, verbose=self.verbose)
            except np.linalg.linalg.LinAlgError as e:
                # break
                print('Error: np.linalg.linalg.LinAlgError')


    def updateModel(self, Target_data):
        ###Construct objective model
        if self.model_name == 'HyperBO':
            self.obj_model.retrain(Target_data)
        elif self.model_name == 'FSBO':
            self.obj_model.X_obs, self.obj_model.y_obs = totorch(Target_data['X'], self.device), totorch(
                Target_data['Y'], self.device).reshape(-1)
            self.obj_model.train()
        else:
            X = Target_data['X']
            Y = Target_data['Y']
            self.obj_model.set_XY(X, Y)
            try:
                self.obj_model.optimize_restarts(messages=True, num_restarts=1,
                                             verbose=self.verbose)
            except np.linalg.linalg.LinAlgError as e:
                # break
                print('Error: np.linalg.linalg.LinAlgError')

    def resetModel(self, Source_data, Target_data):
        ## Train target model
        pass


    def predict(self, X):
        """
        Predictions with the model. Returns posterior means and standard deviations at X. Note that this is different in GPy where the variances are given.

        Parameters:
            X (np.ndarray) - points to run the prediction for.
            with_noise (bool) - whether to add noise to the prediction. Default is True.
        """

        if self.model_name == 'HyperBO':
            m, v = self.obj_model.predict(X)
            m = np.array(m)
            v = np.array(v)

        elif self.model_name == 'FSBO':
            X = totorch(X, self.device)
            m,v = self.obj_model.predict(X)
            m = m[:,np.newaxis]
            v = v[:,np.newaxis]
        else:
            m, v = self.obj_model.predict(X)

        # We can take the square root because v is just a diagonal matrix of variances
        return m, v


    #
    # def obj_posterior_samples(self, X, sample_size):
    #     if X.ndim == 1:
    #         X = X[None,:]
    #     task_id = self.output_dim - 1
    #
    #     if self.model_name == 'WSGP' or \
    #             self.model_name == 'HGP':
    #         samples_obj = self.posterior_samples(X, model_id=0,size=sample_size)
    #     elif self.model_name == 'MOGP':
    #         noise_dict = {'output_index': np.array([task_id] * X.shape[0])[:, np.newaxis].astype(int)}
    #         X_zip = np.hstack((X, noise_dict['output_index']))
    #
    #         samples_obj = self.obj_model.posterior_samples(X_zip, size=sample_size, Y_metadata=noise_dict) # grid * 1 * sample_num
    #
    #     else:
    #         raise NameError
    #
    #     return samples_obj

    def get_fmin(self):
        "Get the minimum of the current model."
        if self.model_name == 'HyperBO':
            m = np.array(self.obj_model._Y)
            return np.min(m)
        elif self.model_name == 'FSBO':
            m = self.obj_model.y_obs.detach().to("cpu").numpy().reshape(-1,)
            return np.min(m)
        else:
            m, v = self.predict(self.obj_model.X)

        return m.min()

    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.obj_model.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.obj_model.output_index = util.multioutput.build_XY(Y, Y)

        self.obj_model.update_model(False)
        if Y is not None:
            self.obj_model.Y = ObsAr(Y)
            self.obj_model.Y_normalized = self.obj_model.Y
        if X is not None:
            self.obj_model.X = ObsAr(X)

        self.obj_model.Y_metadata = {'output_index': self.obj_model.output_index, 'trials': np.ones(self.obj_model.output_index.shape)}
        if isinstance(self.obj_model.inference_method, expectation_propagation.EP):
            self.obj_model.inference_method.reset()
        self.obj_model.update_model(True)

    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        #orig_shape = gp.shape
        gp = gp.flatten()
        Ysim = np.array([np.random.normal(gpj, scale=np.sqrt(1e-2), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)

    def posterior_samples_f(self,X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """
        m, v = self.obj_model.predict(X, return_full=True)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X, model_id=model_id, size=size)

        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = self.samples(fsim[:, d])
        else:
            fsim = self.samples(fsim)
        return fsim

================================================
FILE: transopt/optimizer/SingleObjOptimizer/MultitaskOptimizer.py
================================================
import numpy as np
import GPy
from typing import Dict, Union, List
from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from paramz import ObsAr
from transopt.utils.Normalization import get_normalizer
from GPy import util
from transopt.utils.Kernel import construct_multi_objective_kernel
from GPy.inference.latent_function_inference import expectation_propagation
from GPy.inference.latent_function_inference import ExactGaussianInference
from GPy.likelihoods.multioutput_likelihood import MixedNoise
from transopt.optimizer.model.MPGP import MPGP

@optimizer_register('MTBO')
class MultitaskBO(BOBase):
    def __init__(self, config:Dict, **kwargs):
        super(MultitaskBO, self).__init__(config=config)
        self.init_method = 'Random'
        self.model = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'acf' in config:
            self.acf = config['acf']
        else:
            self.acf = 'EI'


    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def suggest(self, n_suggestions:Union[None, int] = None) ->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])


            if len(self.aux_data):
                Data = self.aux_data
            else:
                Data = {}
            Data['Target'] = {'X':self._X, 'Y':self._Y}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def update_model(self, Data):
        assert 'Target' in Data
        X_list = []
        Y_list = []

        if 'History' in Data:
            history_data = Data['History']
            X_list.extend(list(history_data['X']))
            Y_list.extend(list(history_data['Y']))


        target_data = Data['Target']
        X_list.append(target_data['X'])
        Y_list.append(target_data['Y'])

        if self.normalizer is not None:
            Y_list = self.normalizer(Y_list)

        self.output_dim = len(Y_list)
        self.task_id = self.output_dim - 1

        if self.obj_model == None:
            self.create_model(X_list, Y_list)
        else:
            if self.output_dim > 1:
                self.set_XY(X_list, Y_list)
            else:
                self.obj_model.set_XY(X_list[0], Y_list[0])

        try:
            self.obj_model.optimize_restarts(num_restarts=1, verbose=self.verbose, robust=True)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')

    def create_model(self, X_list, Y_list, mf=None, prior:list=[]):
        if self.output_dim > 1:
            X, Y, output_index = util.multioutput.build_XY(X_list, Y_list)

            #Set inference Method
            inference_method = ExactGaussianInference()
            ## Set likelihood
            likelihoods_list = [GPy.likelihoods.Gaussian(name="Gaussian_noise_obj_%s" % j) for y, j in
                                zip(Y, range(self.output_dim))]
            likelihood = MixedNoise(likelihoods_list=likelihoods_list)

            kernel = construct_multi_objective_kernel(self.input_dim, output_dim=self.output_dim, base_kernel='RBF', rank=self.output_dim)
            self.obj_model = MPGP(X, Y, kernel, likelihood, Y_metadata={'output_index': output_index}, inference_method=inference_method, name=f'OBJ MPGP')

        else:
            if 'kernel' in self.config:
                kern = GPy.kern.RBF(self.input_dim, ARD=False)
            else:
                kern = GPy.kern.RBF(self.input_dim, ARD=False)
            X = X_list[0]
            Y = Y_list[0]

            self.obj_model = GPy.models.GPRegression(X, Y, kernel=kern)
            self.obj_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)


    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.obj_model.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.obj_model.output_index = util.multioutput.build_XY(Y, Y)

        self.obj_model.update_model(False)
        if Y is not None:
            self.obj_model.Y = ObsAr(Y)
            self.obj_model.Y_normalized = self.obj_model.Y
        if X is not None:
            self.obj_model.X = ObsAr(X)

        self.obj_model.Y_metadata = {'output_index': self.obj_model.output_index, 'trials': np.ones(self.obj_model.output_index.shape)}
        if isinstance(self.obj_model.inference_method, expectation_propagation.EP):
            self.obj_model.inference_method.reset()
        self.obj_model.update_model(True)

    def model_reset(self):
        self.obj_model = None

    def predict(self, X):
        """
        Predictions with the model. Returns posterior means and standard deviations at X. Note that this is different in GPy where the variances are given.

        Parameters:
            X (np.ndarray) - points to run the prediction for.
            with_noise (bool) - whether to add noise to the prediction. Default is True.
        """
        if X.ndim == 1:
            X = X[None,:]

        if self.output_dim > 1:
            noise_dict  = {'output_index': np.array([self.task_id] * X.shape[0])[:,np.newaxis].astype(int)}
            X = np.hstack((X, noise_dict['output_index']))

            m, v = self.obj_model.predict(X, Y_metadata=noise_dict, full_cov=False, include_likelihood=True)
            v = np.clip(v, 1e-10, np.inf)

        else:
            m, v = self.obj_model.predict(X)

        # We can take the square root because v is just a diagonal matrix of variances
        return m, v

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self.obj_model.X)

        return m.min()

================================================
FILE: transopt/optimizer/SingleObjOptimizer/PROptimizer.py
================================================
import GPy
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.algorithms.soo.nonconvex.de import DE
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.algorithms.soo.nonconvex.pso import PSO
from typing import Dict, Union, List
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import vectors_to_ndarray, output_to_ndarray
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer


@optimizer_register('PREA')
class PREA(BayesianOptimizerBase):
    def __init__(self, config: Dict, **kwargs):
        super(PREA, self).__init__(config=config)

        self.init_method = 'latin'
        self.model = None
        self.ea = None
        self.problem = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'ea' in config:
            self.ea_name = config['ea']
        else:
            self.ea_name = 'GA'

        if 'degree' in config:
            self.degree = config['degree']
        else:
            self.degree = 10

        # model_manage: 'best' or 'pre-select' or 'generation'
        if 'model_manage' in config:
            self.model_manage = config['model_manage']
        else:
            self.model_manage = 'best'

        # 'best':k best individual, 'pre-select' and 'generation': every k generation
        if 'k' in config:
            self.k = config['k']
        else:
            self.k = 1

        self.pop = None
        self.pop_num = self.ini_num

    def initial_sample(self):
        return self.sample(self.ini_num)

    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            Data = {'Target': {'X': self._X, 'Y': self._Y}}
            self.update_model(Data)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            # 得到新的种群
            self.pop = self.ea.ask()
            # 模型管理策略，选择需要准确评估的个体
            elites = self.model_manage_strategy().reshape(-1, self.input_dim)
            # 准确评估优秀个体
            suggested_sample = self.search_space.zip_inputs(elites)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def observe(self, input_vectors: Union[List[Dict], Dict], output_value: Union[List[Dict], Dict]) -> None:
        self._data_handler.add_observation(input_vectors, output_value)

        # Convert dict to list of dict
        if isinstance(input_vectors, Dict):
            input_vectors = [input_vectors]
        if isinstance(output_value, Dict):
            output_value = [output_value]

        # Check if the lists are empty and return if they are
        if len(input_vectors) == 0 and len(output_value) == 0:
            return

        self._validate_observation('design', input_vectors=input_vectors, output_value=output_value)
        X = self.transform(input_vectors)

        self._X = np.vstack(
            (self._X, vectors_to_ndarray(self._get_var_name('search'), X))) if self._X.size else vectors_to_ndarray(
            self._get_var_name('search'), X)
        self._Y = np.vstack((self._Y, output_to_ndarray(output_value))) if self._Y.size else output_to_ndarray(
            output_value)

        if self.pop is not None:
            self.pop[self.elites_idx].F = output_value
            # 将 pop 返回给 EA
            self.ea.tell(infills=self.pop)

    def update_model(self, Data):
        assert 'Target' in Data
        target_data = Data['Target']
        X = target_data['X']
        Y = target_data['Y']

        if self.normalizer is not None:
            Y = self.normalizer(Y)

        if self.obj_model is None:
            self.create_model(X, Y)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            self.create_ea()
        else:
            X_poly = self.poly_features.fit_transform(X)
            self.obj_model.fit(X_poly, Y)

    def create_model(self, X, Y):
        self.poly_features = PolynomialFeatures(self.degree)
        X_poly = self.poly_features.fit_transform(X)
        self.obj_model = LinearRegression()
        self.obj_model.fit(X_poly, Y)

    def create_ea(self):
        if self.ea_name == 'GA':
            self.ea = GA(self.pop_num)
        elif self.ea_name == 'DE':
            self.ea = DE(self.pop_num)
        elif self.ea_name == 'PSO':
            self.ea = PSO(self.pop_num)
        elif self.ea_name == 'CMAES':
            self.ea = CMAES(self.pop_num)
        self.ea.setup(self.problem, verbose=False)

    def predict(self, X):
        if X.ndim == 1:
            X = X[None, :]

        X_poly = self.poly_features.transform(X)
        Y = self.obj_model.predict(X_poly)
        return Y, None

    def sample(self, num_samples: int) -> List[Dict]:
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        temp = None
        if self.init_method == 'latin':
            temp = np.random.rand(num_samples, self.input_dim)
            for i in range(self.input_dim):
                temp[:, i] = (temp[:, i] + np.random.permutation(np.arange(num_samples))) / num_samples

        samples = []
        for i in range(num_samples):
            sample = {}
            for j, var_info in enumerate(self.search_space.config_space):
                var_name = var_info['name']
                var_domain = var_info['domain']
                if self.init_method == 'random':
                    value = np.random.uniform(var_domain[0], var_domain[1])
                elif self.init_method == 'latin':
                    value = temp[i][j] * (var_domain[1] - var_domain[0]) + var_domain[0]
                sample[var_name] = value
            samples.append(sample)

        samples = self.inverse_transform(samples)
        return samples

    def model_reset(self):
        self.obj_model = None

    def get_fmin(self):
        m, _ = self.predict(self._X)
        return m.min()

    def reset(self, task_name: str, design_space: Dict, search_sapce: Union[None, Dict] = None):
        self.set_space(design_space, search_sapce)
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self._data_handler.reset_task(task_name, design_space)
        self.sync_data(self._data_handler.get_input_vectors(), self._data_handler.get_output_value())
        self.model_reset()

    def model_manage_strategy(self):
        self.ea.evaluator.eval(self.problem, self.pop)
        pop_X = np.array([p.X for p in self.pop])
        pop_F = np.array([p.F for p in self.pop])
        if self.model_manage == 'best':
            top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
            elites = self.pop_X[top_k_idx]
        elif self.model_manage == 'pre-select':
            total_pop_X = pop_X
            total_pop_F = pop_F
            for i in range(self.k - 1):
                pop = self.ea.ask()
                self.ea.evaluator.eval(self.problem, pop)
                pop_X = np.array([p.X for p in pop])
                pop_F = np.array([p.F for p in pop])
                total_pop_X = np.concatenate((total_pop_X, pop_X))
                total_pop_F = np.concatenate((total_pop_F, pop_F))
            top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.ini_num]
            elites = total_pop_X[top_k_idx]
        elif self.model_manage == 'generation':
            for i in range(self.k - 1):
                pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            top_k_idx = range(len(pop_X))
            elites = pop_X
        else:
            raise ValueError(f"Invalid model manage strategy: {self.model_manage}")
        self.elites_idx = top_k_idx
        return elites


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)


================================================
FILE: transopt/optimizer/SingleObjOptimizer/RBFNOptimizer.py
================================================
import GPy
import numpy as np
from pymoo.core.problem import Problem
from pymoo.algorithms.soo.nonconvex.ga import GA
from pymoo.algorithms.soo.nonconvex.de import DE
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.algorithms.soo.nonconvex.pso import PSO
from typing import Dict, Union, List

from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import vectors_to_ndarray, output_to_ndarray
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer
from transopt.optimizer.model.RBFN import RBFN, RegressionDataset


@optimizer_register('RbfnEA')
class RbfnEA(BayesianOptimizerBase):
    def __init__(self, config: Dict, **kwargs):
        super(RbfnEA, self).__init__(config=config)

        self.init_method = 'latin'
        self.model = None
        self.ea = None
        self.problem = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'ea' in config:
            self.ea_name = config['ea']
        else:
            self.ea_name = 'GA'

        if 'max_epoch' in config:
            self.max_epoch = config['max_epoch']
        else:
            self.max_epoch = 10

        if 'batch_size' in config:
            self.batch_size = config['batch_size']
        else:
            self.batch_size = 1

        if 'lr' in config:
            self.lr = config['lr']
        else:
            self.lr = 0.01

        if 'num_centers' in config:
            self.num_centers = config['num_centers']
        else:
            self.num_centers = 10
        
        # model_manage: 'best' or 'pre-select' or 'generation'
        if 'model_manage' in config:
            self.model_manage = config['model_manage']
        else:
            self.model_manage = 'best'

        # 'best':k best individual, 'pre-select' and 'generation': every k generation
        if 'k' in config:
            self.k = config['k']
        else:
            self.k = 1

        self.pop = None
        self.pop_num = self.ini_num

    def initial_sample(self):
        return self.sample(self.ini_num)

    def suggest(self, n_suggestions: Union[None, int] = None) -> List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            Data = {'Target': {'X': self._X, 'Y': self._Y}}
            self.update_model(Data)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            # 得到新的种群
            self.pop = self.ea.ask()
            # 模型管理策略，选择需要准确评估的个体
            elites = self.model_manage_strategy().reshape(-1, self.input_dim)
            # 准确评估优秀个体
            suggested_sample = self.search_space.zip_inputs(elites)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def observe(self, input_vectors: Union[List[Dict], Dict], output_value: Union[List[Dict], Dict]) -> None:
        self._data_handler.add_observation(input_vectors, output_value)

        # Convert dict to list of dict
        if isinstance(input_vectors, Dict):
            input_vectors = [input_vectors]
        if isinstance(output_value, Dict):
            output_value = [output_value]

        # Check if the lists are empty and return if they are
        if len(input_vectors) == 0 and len(output_value) == 0:
            return

        self._validate_observation('design', input_vectors=input_vectors, output_value=output_value)
        X = self.transform(input_vectors)

        self._X = np.vstack((self._X, vectors_to_ndarray(self._get_var_name('search'), X))) if self._X.size else vectors_to_ndarray(self._get_var_name('search'), X)
        self._Y = np.vstack((self._Y, output_to_ndarray(output_value))) if self._Y.size else output_to_ndarray(output_value)

        if self.pop is not None:
            self.pop[self.elites_idx].F = output_value
            # 将 pop 返回给 EA
            self.ea.tell(infills=self.pop)

    def update_model(self, Data):
        assert 'Target' in Data
        target_data = Data['Target']
        X = target_data['X']
        Y = target_data['Y']

        if self.normalizer is not None:
            Y = self.normalizer(Y)

        if self.obj_model is None:
            self.create_model(X, Y)
            self.problem = EAProblem(self.search_space.config_space, self.predict)
            self.create_ea()
        else:
            dataset = RegressionDataset(torch.from_numpy(X), torch.from_numpy(Y))
            self.obj_model.update_dataset(dataset)

        try:
            self.obj_model.train()
        except np.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.LinAlgError')

    def create_model(self, X, Y):
        dataset = RegressionDataset(torch.from_numpy(X), torch.from_numpy(Y))
        self.obj_model = RBFN(dataset=dataset,
                              max_epoch=self.max_epoch,
                              batch_size=self.batch_size,
                              lr=self.lr,
                              num_centers=self.num_centers)

    def create_ea(self):
        if self.ea_name == 'GA':
            self.ea = GA(self.pop_num)
        elif self.ea_name == 'DE':
            self.ea = DE(self.pop_num)
        elif self.ea_name == 'PSO':
            self.ea = PSO(self.pop_num)
        elif self.ea_name == 'CMAES':
            self.ea = CMAES(self.pop_num)
        self.ea.setup(self.problem, verbose=False)

    def predict(self, X):
        if X.ndim == 1:
            X = X[None, :]

        Y = self.obj_model.predict(X)
        return Y, None

    def sample(self, num_samples: int) -> List[Dict]:
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        temp = None
        if self.init_method == 'latin':
            temp = np.random.rand(num_samples, self.input_dim)
            for i in range(self.input_dim):
                temp[:, i] = (temp[:, i] + np.random.permutation(np.arange(num_samples))) / num_samples

        samples = []
        for i in range(num_samples):
            sample = {}
            for j, var_info in enumerate(self.search_space.config_space):
                var_name = var_info['name']
                var_domain = var_info['domain']
                if self.init_method == 'random':
                    value = np.random.uniform(var_domain[0], var_domain[1])
                elif self.init_method == 'latin':
                    value = temp[i][j] * (var_domain[1] - var_domain[0]) + var_domain[0]
                sample[var_name] = value
            samples.append(sample)

        samples = self.inverse_transform(samples)
        return samples

    def model_reset(self):
        self.obj_model = None

    def get_fmin(self):
        X = self.obj_model.dataset.inputs.numpy()
        m, _ = self.predict(X)
        return m.min()

    def reset(self, task_name: str, design_space: Dict, search_sapce: Union[None, Dict] = None):
        self.set_space(design_space, search_sapce)
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self._data_handler.reset_task(task_name, design_space)
        self.sync_data(self._data_handler.get_input_vectors(), self._data_handler.get_output_value())
        self.model_reset()

    def model_manage_strategy(self):
        self.ea.evaluator.eval(self.problem, self.pop)
        pop_X = np.array([p.X for p in self.pop])
        pop_F = np.array([p.F for p in self.pop])
        if self.model_manage == 'best':
            top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
            elites = self.pop_X[top_k_idx]
        elif self.model_manage == 'pre-select':
            total_pop_X = pop_X
            total_pop_F = pop_F
            for i in range(self.k - 1):
                pop = self.ea.ask()
                self.ea.evaluator.eval(self.problem, pop)
                pop_X = np.array([p.X for p in pop])
                pop_F = np.array([p.F for p in pop])
                total_pop_X = np.concatenate((total_pop_X, pop_X))
                total_pop_F = np.concatenate((total_pop_F, pop_F))
            top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.ini_num]
            elites = total_pop_X[top_k_idx]
        elif self.model_manage == 'generation':
            for i in range(self.k - 1):
                pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            top_k_idx = range(len(pop_X))
            elites = pop_X
        else:
            raise ValueError(f"Invalid model manage strategy: {self.model_manage}")
        self.elites_idx = top_k_idx
        return elites

class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/SingleObjOptimizer/RGPEOptimizer.py
================================================
import numpy as np
import GPy

from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer
from typing import Dict, Union, List, Tuple
from transopt.optimizer.optimizer_base import BOBase
from optimizer.model.rgpe import RGPE

@optimizer_register("RGPE")
class RGPEOptimizer(BOBase):
    def __init__(self, config: Dict, **kwargs):
        super(RGPEOptimizer, self).__init__(config=config)
        self.init_method = 'Random'

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'acf' in config:
            self.acf = config['acf']
        else:
            self.acf = 'EI'


    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def model_reset(self):
        if self.obj_model is None:
            self.obj_model = RGPE(n_features=self.input_dim)
        if self.obj_model.target_model is not None:
            self.meta_update()
        if self._X.size != 0:
            self.obj_model.fit({'X':self._X, 'Y':self._Y})


    def meta_update(self):
        self.obj_model.meta_update()
    def suggest(self, n_suggestions:Union[None, int] = None) ->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            Data = {}
            Data['Target'] = {'X':self._X, 'Y':self._Y}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def create_model(self):
        self.obj_model = RGPE(self.input_dim)

    def create_model(self, model_name, Source_data, Target_data):
        self.model_name = model_name
        source_num = len(Source_data['Y'])
        self.output_dim = source_num + 1

        ##Meta Date
        meta_data = {}
        for i in range(source_num):
            meta_data[i] = TaskData(X=Source_data['X'][i], Y=Source_data['Y'][i])

        ###Construct objective model
        if self.model_name == 'RGPE':
            self.obj_model = get_model('RGPE', self.model_space)
            self.obj_model.meta_fit(meta_data)
            self.obj_model.fit(TaskData(Target_data['X'], Target_data['Y']), optimize=True)
        elif self.model_name == 'SGPT_POE':
            self.obj_model = SGPT_POE(n_features=self.Xdim, beta=1)
            self.obj_model.meta_fit(meta_data)
            self.obj_model.fit(TaskData(Target_data['X'], Target_data['Y']), optimize=True)
        elif self.model_name == 'SGPT_M':
            self.obj_model = SGPT_M(n_features=self.Xdim)
            self.obj_model.meta_fit(meta_data)
            self.obj_model.fit(TaskData(Target_data['X'], Target_data['Y']), optimize=True)
        else:
            if self.kernel == None or self.kernel == 'RBF':
                kern = GPy.kern.RBF(self.Xdim, ARD=True)
            else:
                kern = GPy.kern.RBF(self.Xdim, ARD=True)
            X = Target_data['X']
            Y = Target_data['Y']

            self.obj_model = GPy.models.GPRegression(X, Y, kernel=kern)
            self.obj_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)
            try:
                self.obj_model.optimize_restarts(messages=True, num_restarts=1, verbose=self.verbose)
            except np.linalg.linalg.LinAlgError as e:
                # break
                print('Error: np.linalg.linalg.LinAlgError')

    def updateModel(self, Target_data):
        ## Train target model
        if self.model_name == 'RGPE' or \
            self.model_name == 'SGPT_POE' or self.model_name == 'SGPT_M':
            self.obj_model.fit(TaskData(Target_data['X'], Target_data['Y']), optimize=True)

        else:
            X = Target_data['X']
            Y = Target_data['Y']
            self.obj_model.set_XY(X, Y)
            try:
                self.obj_model.optimize_restarts(messages=True, num_restarts=1,
                                                 verbose=self.verbose)
            except np.linalg.linalg.LinAlgError as e:
                # break
                print('Error: np.linalg.linalg.LinAlgError')

            return None

    def reset_target(self):
        self.obj_model.reset_target()

    def meta_add(self, meta_data):
        self.obj_model.meta_add(meta_data)

    def resetModel(self, Source_data, Target_data):
        ## Train target model
        pass

    def get_train_time(self):
        return self.fit_time

    def get_fit_time(self):
        return self.acf_time


    def predict(
        self, X, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:

        # returned mean: sum of means of the predictions of all source and target GPs
        mu, var = self.obj_model.predict(X, return_full=return_full)

        return mu, var


    def obj_posterior_samples(self, X, sample_size):
        if X.ndim == 1:
            X = X[None,:]
        task_id = self.output_dim - 1

        if self.model_name == 'SGPT_POE' or self.model_name == 'SGPT_M' or\
                self.model_name == 'RGPE':
            samples_obj = self.posterior_samples(X, model_id=0,size=sample_size)

        else:
            raise NameError

        return samples_obj

    def update_model(self, Data: Dict):
        ## Train target model
        if self.obj_model is None:
            self.create_model(Data['Target'])
        elif self.obj_model is not None:
            self.obj_model.set_XY(Data['Target'])
        else:
            self.obj_model.set_XY(Data['Target'])

        ## Train target model
        self.obj_model.fit(Data['Target'], optimize=True)

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self.obj_model.X)

        return m.min()

    def set_XY(self, X=None, Y=None):
        if isinstance(X, list):
            X, _, self.obj_model.output_index = util.multioutput.build_XY(X, None)
        if isinstance(Y, list):
            _, Y, self.obj_model.output_index = util.multioutput.build_XY(Y, Y)

        self.obj_model.update_model(False)
        if Y is not None:
            self.obj_model.Y = ObsAr(Y)
            self.obj_model.Y_normalized = self.obj_model.Y
        if X is not None:
            self.obj_model.X = ObsAr(X)

        self.obj_model.Y_metadata = {'output_index': self.obj_model.output_index, 'trials': np.ones(self.obj_model.output_index.shape)}
        if isinstance(self.obj_model.inference_method, expectation_propagation.EP):
            self.obj_model.inference_method.reset()
        self.obj_model.update_model(True)

    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        #orig_shape = gp.shape
        gp = gp.flatten()
        Ysim = np.array([np.random.normal(gpj, scale=np.sqrt(1e-2), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)

    def posterior_samples_f(self,X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """
        m, v = self.obj_model.predict(X, return_full=True)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X, model_id=model_id, size=size)

        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = self.samples(fsim[:, d])
        else:
            fsim = self.samples(fsim)
        return fsim

================================================
FILE: transopt/optimizer/SingleObjOptimizer/TPEOptimizer.py
================================================
import numpy as np
from typing import Dict, List, Union
from transopt.optimizer.optimizer_base import BOBase
from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register

from transopt.utils.Normalization import get_normalizer


@optimizer_register('TPE')
class TPEOptimizer(BOBase):
    def __init__(self, config:Dict, **kwargs):
        super(TPEOptimizer, self).__init__(config=config)

        self.init_method = 'Random'
        self.model = None

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'acf' in config:
            self.acf = config['acf']
        else:
            self.acf = 'EI'

        self.obj_model = None


    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def suggest(self, n_suggestions:Union[None, int] = None) ->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])


            Data = {'Target':{'X':self._X, 'Y':self._Y}}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def update_model(self, Data):
        assert 'Target' in Data
        target_data = Data['Target']
        X = target_data['X']
        Y = target_data['Y']

        if self.normalizer is not None:
            Y = self.normalizer(Y)

        if self.obj_model == None:
            self.create_model(X, Y)
        else:
            self.obj_model.set_XY(X, Y)

        try:
            self.obj_model.optimize_restarts(num_restarts=1, verbose=self.verbose, robust=True)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')

    def create_model(self, X, Y):
        self.obj_model = TPEOptimizer()


    def predict(self, X):
        """
        Predictions with the model. Returns posterior means and standard deviations at X. Note that this is different in GPy where the variances are given.

        Parameters:
            X (np.ndarray) - points to run the prediction for.
            with_noise (bool) - whether to add noise to the prediction. Default is True.
        """
        if X.ndim == 1:
            X = X[None,:]

        m, v = self.obj_model.predict(X)

        # We can take the square root because v is just a diagonal matrix of variances
        return m, v


    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def model_reset(self):
        self.obj_model = None

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self.obj_model.X)

        return m.min()

    def posterior_samples(self, X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X, model_id=model_id, size=size)

        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = self.samples(fsim[:, d])
        else:
            fsim = self.samples(fsim)
        return fsim

================================================
FILE: transopt/optimizer/SingleObjOptimizer/VizerOptimizer.py
================================================
import numpy as np

from transopt.utils.serialization import ndarray_to_vectors
from agent.registry import optimizer_register
from transopt.utils.Normalization import get_normalizer
from transopt.optimizer.model.MHGP import MHGP
from typing import Dict, Union, List, Tuple
from transopt.optimizer.optimizer_base import BOBase


@optimizer_register('vizer')
class Vizer(BOBase):

    def __init__(self, config: Dict, **kwargs):
        super(Vizer, self).__init__(config=config)
        self.init_method = 'Random'

        if 'verbose' in config:
            self.verbose = config['verbose']
        else:
            self.verbose = True

        if 'init_number' in config:
            self.ini_num = config['init_number']
        else:
            self.ini_num = None

        if 'acf' in config:
            self.acf = config['acf']
        else:
            self.acf = 'EI'


    def model_reset(self):
        if self.obj_model is None:
            self.obj_model = MHGP(n_features=self.input_dim)
        if self.obj_model.target_gp is not None:
            self.meta_update()
            self.obj_model.fit({'X':self._X, 'Y':self._Y})
        if self.obj_model.target_gp is None and self._X.size != 0:
            self.obj_model.fit({'X':self._X, 'Y':self._Y})

    def initial_sample(self):
        return self.random_sample(self.ini_num)

    def random_sample(self, num_samples: int) -> List[Dict]:
        """
        Initialize random samples.

        :param num_samples: Number of random samples to generate
        :return: List of dictionaries, each representing a random sample
        """
        if self.input_dim is None:
            raise ValueError("Input dimension is not set. Call set_search_space() to set the input dimension.")

        random_samples = []
        for _ in range(num_samples):
            sample = {}
            for var_info in self.search_space.config_space:
                var_name = var_info['name']
                var_domain = var_info['domain']
                # Generate a random floating-point number within the specified range
                random_value = np.random.uniform(var_domain[0], var_domain[1])
                sample[var_name] = random_value
            random_samples.append(sample)

        random_samples = self.inverse_transform(random_samples)
        return random_samples

    def suggest(self, n_suggestions:Union[None, int] = None) ->List[Dict]:
        if self._X.size == 0:
            suggests = self.initial_sample()
            return suggests
        elif self._X.shape[0] < self.ini_num:
            pass
        else:
            if 'normalize' in self.config:
                self.normalizer = get_normalizer(self.config['normalize'])

            Data = {}
            Data['Target'] = {'X':self._X, 'Y':self._Y}
            self.update_model(Data)
            suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
            suggested_sample = self.search_space.zip_inputs(suggested_sample)
            suggested_sample = ndarray_to_vectors(self._get_var_name('search'), suggested_sample)
            design_suggested_sample = self.inverse_transform(suggested_sample)

            return design_suggested_sample

    def meta_update(self):
        self.obj_model.meta_update()

    def meta_add(self, Data:List[Dict]):
        self.obj_model.meta_add(Data)

    def create_model(self):
        self.obj_model = MHGP(self.input_dim)


    def update_model(self, Data):
        ## Train target model
        if self.obj_model is None:
            self.create_model(Data['Target'])
        elif self.obj_model is not None:
            self.obj_model.set_XY(Data['Target'])
        else:
            self.obj_model.set_XY(Data['Target'])

        ## Train target model
        self.obj_model.fit(Data['Target'], optimize=True)


    def MetaFitModel(self, metadata):

        if self.model_name == 'SHGP' or \
            self.model_name == 'MHGP' or \
            self.model_name == 'BHGP':

            self.obj_model.meta_fit(metadata)

    # def meta_add(self, meta_data:Dict):
    #     self.obj_model.meta_add(meta_data:Dict)


    def get_train_time(self):
        return self.fit_time

    def get_fit_time(self):
        return self.acf_time


    def predict(
        self, X, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:

        # returned mean: sum of means of the predictions of all source and target GPs
        mu, var = self.obj_model.predict(X, return_full=return_full, with_noise=with_noise)

        return mu, var


    def obj_posterior_samples(self, X, sample_size):
        if X.ndim == 1:
            X = X[None,:]
        task_id = self.output_dim - 1

        if self.model_name == 'SHGP' or \
                self.model_name == 'HGP' or \
                self.model_name == 'MHGP' or \
                self.model_name == 'BHGP' or \
                self.model_name == 'RPGE':
            samples_obj = self.posterior_samples(X, model_id=0,size=sample_size)
        elif self.model_name == 'MOGP':
            noise_dict = {'output_index': np.array([task_id] * X.shape[0])[:, np.newaxis].astype(int)}
            X_zip = np.hstack((X, noise_dict['output_index']))

            samples_obj = self.obj_model.posterior_samples(X_zip, size=sample_size, Y_metadata=noise_dict) # grid * 1 * sample_num

        else:
            raise NameError

        return samples_obj

    def get_fmin(self):
        "Get the minimum of the current model."
        m, v = self.predict(self.obj_model.X)

        return m.min()

    def samples(self, gp):
        """
        Returns a set of samples of observations based on a given value of the latent variable.

        :param gp: latent variable
        """
        orig_shape = gp.shape
        gp = gp.flatten()
        # orig_shape = gp.shape
        gp = gp.flatten()
        Ysim = np.array([np.random.normal(gpj, scale=np.sqrt(1e-2), size=1) for gpj in gp])
        return Ysim.reshape(orig_shape)


    def posterior_samples_f(self,X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """
        m, v = self.obj_model.predict(X, return_full=True)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, model_id, size=10):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """
        fsim = self.posterior_samples_f(X, model_id=model_id, size=size)

        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = self.samples(fsim[:, d])
        else:
            fsim = self.samples(fsim)
        return fsim

================================================
FILE: transopt/optimizer/SingleObjOptimizer/__init__.py
================================================
from transopt.optimizer.SingleObjOptimizer.KrigingOptimizer import KrigingGA
from transopt.optimizer.SingleObjOptimizer.LFL import LFLOptimizer
from transopt.optimizer.SingleObjOptimizer.MetaLearningOptimizer import MetaBOOptimizer
from transopt.optimizer.SingleObjOptimizer.MultitaskOptimizer import MultitaskBO
from transopt.optimizer.SingleObjOptimizer.RGPEOptimizer import RGPEOptimizer
from transopt.optimizer.SingleObjOptimizer.TPEOptimizer import TPEOptimizer
from optimizer.SingleObjOptimizer.TLBO import VanillaBO
from transopt.optimizer.SingleObjOptimizer.VizerOptimizer import Vizer

================================================
FILE: transopt/optimizer/__init__.py
================================================
# from transopt.optimizer.model.get_model import get_model
# from transopt.optimizer.sampler.get_sampler import get_sampler
# from transopt.optimizer.refiner.get_refiner import get_refiner
# from transopt.optimizer.pretrain.get_pretrain import get_pretrain
# from transopt.optimizer.acquisition_function import get_acf


================================================
FILE: transopt/optimizer/acquisition_function/ConformalLCB.py
================================================
# Copyright (c) 2016, the GPyOpt Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)

from GPyOpt.acquisitions.base import AcquisitionBase
from agent.registry import acf_registry

@acf_registry.register('ConformalLCB')
class ConformalLCB(AcquisitionBase):
    """
    GP-Lower Confidence Bound acquisition function with constant exploration weight.
    See:

    Gaussian Process Optimization in the Bandit Setting: No Regret and Experimental Design
    Srinivas et al., Proc. International Conference on Machine Learning (ICML), 2010

    :param Model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function
    :param jitter: positive value to make the acquisition more explorative

    .. Note:: does not allow to be used with cost

    """

    analytical_gradient_prediction = False

    def __init__(self, model, space, optimizer, config):
        self.optimizer = optimizer
        super(ConformalLCB, self).__init__(model, space, optimizer)
        if 'exploration_weight' in config:
            self.exploration_weight = config['exploration_weight']
        else:
            self.exploration_weight = 1

    def _compute_acq(self, x):
        """
        Computes the GP-Lower Confidence Bound
        """
        if self.model.qhats is not None and self.model.model_name == 'MOGP':
            m, s = self.model.conformal_prediction(x)
        else:
            m, s = self.model.predict(x)

        f_acqu = -m + self.exploration_weight * s
        return f_acqu

    def _compute_acq_withGradients(self, x):
        """
        Computes the GP-Lower Confidence Bound and its derivative
        """
        m, s, dmdx, dsdx = self.model.predict_withGradients(x)
        f_acqu = -m + self.exploration_weight * s
        df_acqu = -dmdx + self.exploration_weight * dsdx
        return f_acqu, df_acqu


================================================
FILE: transopt/optimizer/acquisition_function/__init__.py
================================================
from transopt.optimizer.acquisition_function.sequential import Sequential

from transopt.optimizer.acquisition_function.ei import AcquisitionEI
from transopt.optimizer.acquisition_function.lcb import AcquisitionLCB
from transopt.optimizer.acquisition_function.pi import AcquisitionPI
from transopt.optimizer.acquisition_function.taf import AcquisitionTAF

# from transopt.optimizer.acquisition_function.SMSEGO import SMSEGO
# from transopt.optimizer.acquisition_function.MOEADEGO import MOEADEGO
# from transopt.optimizer.acquisition_function.CauMOACF import CauMOACF

from transopt.optimizer.acquisition_function.model_manage.GABest import GABest
from transopt.optimizer.acquisition_function.model_manage.GAPreSelect import GAPreSelect
from transopt.optimizer.acquisition_function.model_manage.GAGeneration import GAGeneration
from transopt.optimizer.acquisition_function.model_manage.DEBest import DEBest
from transopt.optimizer.acquisition_function.model_manage.DEPreSelect import DEPreSelect
from transopt.optimizer.acquisition_function.model_manage.DEGeneration import DEGeneration
from transopt.optimizer.acquisition_function.model_manage.PSOBest import PSOBest
from transopt.optimizer.acquisition_function.model_manage.PSOPreSelect import PSOPreSelect
from transopt.optimizer.acquisition_function.model_manage.PSOGeneration import PSOGeneration
from transopt.optimizer.acquisition_function.model_manage.CMAESBest import CMAESBest
from transopt.optimizer.acquisition_function.model_manage.CMAESPreSelect import CMAESPreSelect
from transopt.optimizer.acquisition_function.model_manage.CMAESGeneration import CMAESGeneration


================================================
FILE: transopt/optimizer/acquisition_function/acf_base.py
================================================
# Copyright (c) 2016, the GPyOpt Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import scipy
from GPyOpt import Design_space
from GPyOpt.core.task.cost import constant_cost_withGradients
from GPyOpt.optimization.acquisition_optimizer import AcquisitionOptimizer
from GPyOpt.util import epmgp


class AcquisitionBase(object):
    """
    Base class for acquisition functions in Bayesian Optimization

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer

    """

    analytical_gradient_prediction = False

    def __init__(self, cost_withGradients=None, **kwargs):
        self.analytical_gradient_acq = self.analytical_gradient_prediction and self.model.analytical_gradient_prediction # flag from the model to test if gradients are available

        if 'optimizer_name' in kwargs:
            self.optimizer_name = kwargs['optimizer']
        else:
            self.optimizer_name = 'lbfgs'

        if cost_withGradients is  None:
            self.cost_withGradients = constant_cost_withGradients
        else:
            self.cost_withGradients = cost_withGradients

    @staticmethod
    def fromDict(model, space, optimizer, cost_withGradients, config):
        raise NotImplementedError()
    
    def link(self, model, space):
        self.link_model(model=model)
        self.link_space(space=space)
    
    def link_model(self, model):
        self.model = model
        
    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)
        self.optimizer = AcquisitionOptimizer(self.space, self.optimizer_name)
    

    def acquisition_function(self,x):
        """
        Takes an acquisition and weights it so the domain and cost are taken into account.
        """
        f_acqu = self._compute_acq(x)
        cost_x, _ = self.cost_withGradients(x)
        x_z = x if self.space.model_dimensionality == self.space.objective_dimensionality else self.space.zip_inputs(x)
        return -(f_acqu*self.space.indicator_constraints(x_z))/cost_x


    def acquisition_function_withGradients(self, x):
        """
        Takes an acquisition and it gradient and weights it so the domain and cost are taken into account.
        """
        f_acqu,df_acqu = self._compute_acq_withGradients(x)
        cost_x, cost_grad_x = self.cost_withGradients(x)
        f_acq_cost = f_acqu/cost_x
        df_acq_cost = (df_acqu*cost_x - f_acqu*cost_grad_x)/(cost_x**2)
        x_z = x if self.space.model_dimensionality == self.space.objective_dimensionality else self.space.zip_inputs(x)
        return -f_acq_cost*self.space.indicator_constraints(x_z), -df_acq_cost*self.space.indicator_constraints(x_z)

    def optimize(self, duplicate_manager=None):
        """
        Optimizes the acquisition function (uses a flag from the model to use gradients or not).
        """
        if not self.analytical_gradient_acq:
            out = self.optimizer.optimize(f=self.acquisition_function, duplicate_manager=duplicate_manager)
        else:
            out = self.optimizer.optimize(f=self.acquisition_function, f_df=self.acquisition_function_withGradients, duplicate_manager=duplicate_manager)
        return out

    def _compute_acq(self,x):

        raise NotImplementedError('')

    def _compute_acq_withGradients(self, x):

        raise NotImplementedError('')


================================================
FILE: transopt/optimizer/acquisition_function/ei.py
================================================
import copy

from GPyOpt.acquisitions.base import AcquisitionBase
from GPyOpt.core.task.cost import constant_cost_withGradients
from GPyOpt.util.general import get_quantiles

from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase

@acf_registry.register('EI')
class AcquisitionEI(AcquisitionBase):
    """
    General template to create a new GPyOPt acquisition function

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function that provides the evaluation cost and its gradients

    """
    # --- Set this line to true if analytical gradients are available
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(AcquisitionEI, self).__init__()
        if 'jitter' in config:
            self.jitter = config['jitter']
        else:
            self.jitter = 0.01

        if 'threshold' in config:
            self.threshold = config['threshold']
        else:
            self.threshold = 0

        self.cost_withGradients = constant_cost_withGradients

    def _compute_acq(self, x):

        m, s = self.model.predict(x)
        fmin = self.model.get_fmin()
        phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
        f_acqu_ei = s * (u * Phi + phi)

        return f_acqu_ei

    def _compute_acq_withGradients(self, x):
        # --- DEFINE YOUR AQUISITION (TO BE MAXIMIZED) AND ITS GRADIENT HERE HERE
        #
        # Compute here the value of the new acquisition function. Remember that x is a 2D  numpy array
        # with a point in the domanin in each row. f_acqu_x should be a column vector containing the
        # values of the acquisition at x. df_acqu_x contains is each row the values of the gradient of the
        # acquisition at each point of x.
        #
        # NOTE: this function is optional. If note available the gradients will be approxiamted numerically.
        raise NotImplementedError()


================================================
FILE: transopt/optimizer/acquisition_function/get_acf.py
================================================

from transopt.agent.registry import acf_registry

def get_acf(acf_name, **kwargs):
    """Create the optimizer object."""
    acf_class = acf_registry.get(acf_name)

    if acf_class is not None:
        acf = acf_class(config=kwargs)
    else:
        print(f"ACF '{acf_name}' not found in the registry.")
        raise NameError
    return acf


# def get_acf(acf_name, model, search_space, config, tabular=False):
#     """Create the optimizer object."""
#     acf_class = get_acf.get(acf_name)
#     acquisition_optimizer = GPyOpt.optimization.AcquisitionOptimizer(search_space)
#     if acf_class is not None:
#         acquisition = acf_class(model=model, optimizer=acquisition_optimizer, space=search_space, config=config)
#     else:
#         # 处理任务名称不在注册表中的情况c
#         print(f"Acquisition '{acf_name}' not found in the registry.")
#         raise NameError

#     return acquisition


================================================
FILE: transopt/optimizer/acquisition_function/lcb.py
================================================
# Copyright (c) 2016, the GPyOpt Authors
# Licensed under the BSD 3-clause license (see LICENSE.txt)

from GPyOpt.acquisitions.base import AcquisitionBase

from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase

@acf_registry.register('LCB')
class AcquisitionLCB(AcquisitionBase):
    """
    GP-Lower Confidence Bound acquisition function with constant exploration weight.
    See:

    Gaussian Process Optimization in the Bandit Setting: No Regret and Experimental Design
    Srinivas et al., Proc. International Conference on Machine Learning (ICML), 2010

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function
    :param jitter: positive value to make the acquisition more explorative

    .. Note:: does not allow to be used with cost

    """

    analytical_gradient_prediction = False

    def __init__(self, config):
        super(AcquisitionLCB, self).__init__()
        if 'exploration_weight' in config:
            self.exploration_weight = config['exploration_weight']
        else:
            self.exploration_weight = 1

    def _compute_acq(self, x):
        """
        Computes the GP-Lower Confidence Bound
        """
        m, s = self.model.predict(x)
        f_acqu = -m + self.exploration_weight * s
        return f_acqu

    def _compute_acq_withGradients(self, x):
        """
        Computes the GP-Lower Confidence Bound and its derivative
        """
        m, s, dmdx, dsdx = self.model.predict_withGradients(x)
        f_acqu = -m + self.exploration_weight * s
        df_acqu = -dmdx + self.exploration_weight * dsdx
        return f_acqu, df_acqu


================================================
FILE: transopt/optimizer/acquisition_function/model_manage/CMAESBest.py
================================================
import math

import numpy as np
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from pymoo.core.problem import Problem

from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('CMAES-Best')
class CMAESBest(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(CMAESBest, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = CMAES(pop_size=self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
        elites = pop_X[top_k_idx]
        elites_F = pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/CMAESGeneration.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('CMAES-Generation')
class CMAESGeneration(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(CMAESGeneration, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 1
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = CMAES(pop_size=self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        for i in range(self.k):
            pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = range(len(pop_X))
        elites = pop_X
        elites_F = pop_F
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/CMAESPreSelect.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.cmaes import CMAES
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('CMAES-PreSelect')
class CMAESPreSelect(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(CMAESPreSelect, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = CMAES(pop_size=self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        total_pop_X = pop_X
        total_pop_F = pop_F
        for i in range(self.k - 1):
            pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            pop_F = np.array([p.F for p in pop])
            total_pop_X = np.concatenate((total_pop_X, pop_X))
            total_pop_F = np.concatenate((total_pop_F, pop_F))
        top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.pop_size]
        elites = total_pop_X[top_k_idx]
        elites_F = total_pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/DEBest.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.de import DE
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('DE-Best')
class DEBest(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(DEBest, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = DE(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
        elites = pop_X[top_k_idx]
        elites_F = pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/DEGeneration.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.de import DE
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('DE-Generation')
class DEGeneration(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(DEGeneration, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 1
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = DE(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        for i in range(self.k):
            pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = range(len(pop_X))
        elites = pop_X
        elites_F = pop_F
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/DEPreSelect.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.de import DE
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('DE-PreSelect')
class DEPreSelect(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(DEPreSelect, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = DE(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        total_pop_X = pop_X
        total_pop_F = pop_F
        for i in range(self.k - 1):
            pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            pop_F = np.array([p.F for p in pop])
            total_pop_X = np.concatenate((total_pop_X, pop_X))
            total_pop_F = np.concatenate((total_pop_F, pop_F))
        top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.pop_size]
        elites = total_pop_X[top_k_idx]
        elites_F = total_pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/GABest.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.ga import GA
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('GA-Best')
class GABest(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(GABest, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = GA(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
        elites = pop_X[top_k_idx]
        elites_F = pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/GAGeneration.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.ga import GA
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('GA-Generation')
class GAGeneration(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(GAGeneration, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 1
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = GA(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        for i in range(self.k):
            pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = range(len(pop_X))
        elites = pop_X
        elites_F = pop_F
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/GAPreSelect.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.ga import GA
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('GA-PreSelect')
class GAPreSelect(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(GAPreSelect, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = GA(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        total_pop_X = pop_X
        total_pop_F = pop_F
        for i in range(self.k - 1):
            pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            pop_F = np.array([p.F for p in pop])
            total_pop_X = np.concatenate((total_pop_X, pop_X))
            total_pop_F = np.concatenate((total_pop_F, pop_F))
        top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.pop_size]
        elites = total_pop_X[top_k_idx]
        elites_F = total_pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/PSOBest.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.pso import PSO
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('PSO-Best')
class PSOBest(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(PSOBest, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = PSO(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = sorted(range(len(pop_F)), key=lambda i: pop_F[i])[:self.k]
        elites = pop_X[top_k_idx]
        elites_F = pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/PSOGeneration.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.pso import PSO
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('PSO-Generation')
class PSOGeneration(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(PSOGeneration, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 1
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = PSO(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        for i in range(self.k):
            pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        top_k_idx = range(len(pop_X))
        elites = pop_X
        elites_F = pop_F
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/model_manage/PSOPreSelect.py
================================================
import math
import numpy as np
from pymoo.core.problem import Problem
from GPyOpt import Design_space
from pymoo.algorithms.soo.nonconvex.pso import PSO
from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('PSO-PreSelect')
class PSOPreSelect(AcquisitionBase):
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(PSOPreSelect, self).__init__()
        config_dict = {}
        if config != "":
            if ',' in config:
                key_value_pairs = config.split(',')
            else:
                key_value_pairs = [config]
            for pair in key_value_pairs:
                key, value = pair.split(':')
                config_dict[key.strip()] = value.strip()
        if 'k' in config_dict:
            self.k = int(config_dict['k'])
        else:
            self.k = 2
        if 'n' in config_dict:
            self.pop_size = 4 + math.floor(3 * math.log(int(config_dict['n'])))
        else:
            self.pop_size = 10
        self.model = None
        self.ea = None
        self.problem = None

    def link_space(self, space):
        opt_space = []
        for var_name in space.variables_order:
            var_dic = {
                'name': var_name,
                'type': 'continuous',
                'domain': space[var_name].search_space_range,
            }
            if space[var_name].type == 'categorical' or 'integer':
                var_dic['type'] = 'discrete'

            opt_space.append(var_dic.copy())
            
        self.space = Design_space(opt_space)

        if self.ea is None:
            self.problem = EAProblem(self.space.config_space, self.model.predict)
            self.ea = PSO(self.pop_size)
            self.ea.setup(self.problem, verbose=False)
        else:
            self.problem = EAProblem(self.space.config_space, self.model.predict)

    def optimize(self, duplicate_manager=None):
        pop = self.ea.ask()
        self.ea.evaluator.eval(self.problem, pop)
        pop_X = np.array([p.X for p in pop])
        pop_F = np.array([p.F for p in pop])
        total_pop_X = pop_X
        total_pop_F = pop_F
        for i in range(self.k - 1):
            pop = self.ea.ask()
            self.ea.evaluator.eval(self.problem, pop)
            pop_X = np.array([p.X for p in pop])
            pop_F = np.array([p.F for p in pop])
            total_pop_X = np.concatenate((total_pop_X, pop_X))
            total_pop_F = np.concatenate((total_pop_F, pop_F))
        top_k_idx = sorted(range(len(total_pop_F)), key=lambda i: total_pop_F[i])[:self.pop_size]
        elites = total_pop_X[top_k_idx]
        elites_F = total_pop_F[top_k_idx]
        return elites, elites_F

    def _compute_acq(self, x):
        raise NotImplementedError()

    def _compute_acq_withGradients(self, x):
        raise NotImplementedError()


class EAProblem(Problem):
    def __init__(self, space, predict):
        input_dim = len(space)
        xl = []
        xu = []
        for var_info in space:
            var_domain = var_info['domain']
            xl.append(var_domain[0])
            xu.append(var_domain[1])
        xl = np.array(xl)
        xu = np.array(xu)
        self.predict = predict
        super().__init__(n_var=input_dim, n_obj=1, xl=xl, xu=xu)

    def _evaluate(self, x, out, *args, **kwargs):
        out["F"], _ = self.predict(x)

================================================
FILE: transopt/optimizer/acquisition_function/moeadego.py
================================================
import GPy
import numpy as np
import scipy.optimize as opt
from scipy.stats import *
from scipy.spatial import distance
from GPyOpt.util.general import get_quantiles

from agent.registry import acf_registry
from transopt.utils.hypervolume import calc_hypervolume
from GPyOpt.optimization.acquisition_optimizer import AcquisitionOptimizer


@acf_registry.register("MOEADEGO")
class MOEADEGO:
    def __init__(self, model, space, optimizer, config):
        self.optimizer = optimizer
        self.model = model
        self.model_id = 0
        if 'jitter' in config:
            self.jitter = config['jitter']
        else:
            self.jitter = 0.1

        if 'threshold' in config:
            self.threshold = config['threshold']
        else:
            self.threshold = 0
    def _compute_acq(self, x):
        m, s = self.model.predict_by_id(x, self.model_id)
        fmin = self.model.get_fmin_by_id(self.model_id)
        phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
        f_acqu_ei = s * (u * Phi + phi)

        return -f_acqu_ei

    def set_model_id(self, idx):
        self.model_id = idx
    def optimize(self, duplicate_manager=None):
        space = self.model.search_space
        self.acquisition_optimizer = AcquisitionOptimizer(space, 'lbfgs')  ## more arguments may come here
        suggested_sample = []
        suggested_acfvalue = []
        for i in range(len(self.model.model_list)):
            self.set_model_id(i)
            suggest_x, acf_value = self.acquisition_optimizer.optimize(self._compute_acq)
            suggested_sample.append(suggest_x)
            suggested_acfvalue.append(acf_value)
        suggested_sample = np.vstack(suggested_sample)
        suggested_acfvalue = np.vstack(suggested_acfvalue)
        return suggested_sample, suggested_acfvalue


================================================
FILE: transopt/optimizer/acquisition_function/pi.py
================================================
import copy

from GPyOpt.core.task.cost import constant_cost_withGradients
from GPyOpt.util.general import get_quantiles

from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase

@acf_registry.register('PI')
class AcquisitionPI(AcquisitionBase):
    """
    General template to create a new GPyOPt acquisition function

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function that provides the evaluation cost and its gradients

    """
    # --- Set this line to true if analytical gradients are available
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(AcquisitionPI, self).__init__()
        if 'jitter' in config:
            self.jitter = config['jitter']
        else:
            self.jitter = 0.01
        if 'threshold' in config:
            self.threshold = config['threshold']
        else:
            self.threshold = 0

        self.cost_withGradients = constant_cost_withGradients

    def _compute_acq(self, x):

        m, s = self.model.predict(x)
        fmin = self.model.get_fmin()
        phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
        f_acqu_pi = Phi

        return f_acqu_pi

    def _compute_acq_withGradients(self, x):
        # --- DEFINE YOUR AQUISITION (TO BE MAXIMIZED) AND ITS GRADIENT HERE HERE
        #
        # Compute here the value of the new acquisition function. Remember that x is a 2D  numpy array
        # with a point in the domanin in each row. f_acqu_x should be a column vector containing the
        # values of the acquisition at x. df_acqu_x contains is each row the values of the gradient of the
        # acquisition at each point of x.
        #
        # NOTE: this function is optional. If note available the gradients will be approxiamted numerically.
        raise NotImplementedError()


================================================
FILE: transopt/optimizer/acquisition_function/piei.py
================================================
import copy

from GPyOpt.acquisitions.base import AcquisitionBase
from GPyOpt.core.task.cost import constant_cost_withGradients
from GPyOpt.util.general import get_quantiles
import numpy as np
from scipy.stats import norm
from GPyOpt.acquisitions.LCB import AcquisitionLCB

class AcquisitionpiEI(AcquisitionBase):
    """
    General template to create a new GPyOPt acquisition function

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function that provides the evaluation cost and its gradients

    """
    # --- Set this line to true if analytical gradients are available
    analytical_gradient_prediction = False

    def __init__(self, Model, space, optimizer, cost_withGradients=None, jitter=0.01, threshold=0.):
        self.optimizer = optimizer
        super(AcquisitionpiEI, self).__init__(Model, space, optimizer)
        self.Model = Model
        self.jitter = jitter
        self.threshold = threshold
        if cost_withGradients is None:
            self.cost_withGradients = constant_cost_withGradients
        else:
            print('EIC acquisition does now make sense with cost at present. Cost set to constant.')
            self.cost_withGradients = constant_cost_withGradients

    def _compute_acq(self, x):

        m, s = self.Model.predict(x)
        # fmin = self.CBOModel.get_valid_fmin()
        fmin = self.Model.get_fmin()
        phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
        f_acqu_ei = s * (u * Phi + phi)

        return f_acqu_ei * self._compute_prior(x)

    def _compute_prior(self, x):
        return 1

    def _compute_acq_withGradients(self, x):
        # --- DEFINE YOUR AQUISITION (TO BE MAXIMIZED) AND ITS GRADIENT HERE HERE
        #
        # Compute here the value of the new acquisition function. Remember that x is a 2D  numpy array
        # with a point in the domanin in each row. f_acqu_x should be a column vector containing the
        # values of the acquisition at x. df_acqu_x contains is each row the values of the gradient of the
        # acquisition at each point of x.
        #
        # NOTE: this function is optional. If note available the gradients will be approxiamted numerically.
        raise NotImplementedError()


================================================
FILE: transopt/optimizer/acquisition_function/sequential.py
================================================
from GPyOpt.core.evaluators.base import EvaluatorBase


class Sequential(EvaluatorBase):
    """
    Class for standard Sequential Bayesian optimization methods.

    :param acquisition: acquisition function to be used to compute the batch.
    :param batch size: it is 1 by default since this class is only used for sequential methods.
    """

    def __init__(self, acquisition, batch_size=1):
        super(Sequential, self).__init__(acquisition, batch_size)

    def compute_batch(self, duplicate_manager=None,context_manager=None):
        """
        Selects the new location to evaluate the objective.
        """
        x, acq_value = self.acquisition.optimize(duplicate_manager=duplicate_manager)
        return x, acq_value


# class Sequential_Tabular(EvaluatorBase):
#     """
#     Class for standard Sequential Bayesian optimization methods.
#
#     :param acquisition: acquisition function to be used to compute the batch.
#     :param batch size: it is 1 by default since this class is only used for sequential methods.
#     """
#
#     def __init__(self, acquisition, batch_size=1):
#         super(Sequential_Tabular, self).__init__(acquisition, batch_size)
#
#     def compute_batch(self, X, unobserved_indexes):
#         """
#         Selects the new location to evaluate the objective.
#         """
#         acq_value = self.acquisition._compute_acq(X)
#         min_index = np.argmin(acq_value)
#         return unobserved_indexes[min_index], acq_value[min_index]

================================================
FILE: transopt/optimizer/acquisition_function/smsego.py
================================================
import GPy
import numpy as np
import scipy.optimize as opt
from scipy.stats import *
from scipy.spatial import distance
from GPyOpt.acquisitions.base import AcquisitionBase

from agent.registry import acf_registry
from transopt.utils.hypervolume import calc_hypervolume


@acf_registry.register("SMSEGO")
class SMSEGO:
    def __init__(self, model, space, optimizer, config):
        self.optimizer = optimizer
        self.model = model
        self.const = 1 / norm.cdf(0.5 + 1 / 2**self.model.num_objective)
        self.current_hypervolume = None
        self.w_ref = None

    def _compute_acq(self, x):
        if self.w_ref is None:
            self.w_ref = self.model._Y.max(axis=1) + 1.0e2
        if self.current_hypervolume is None:
            self.current_hypervolume = calc_hypervolume(self.model._Y.T, self.w_ref)

        if np.any(np.all(self.model._X == x, axis=1)):
            return 1.0e5
        else:
            mean, var = self.model.predict(x)
            lcb = mean - self.const * np.sqrt(var)
            new_y_train = np.hstack((self.model._Y, lcb.T)).T

            new_hypervolume = calc_hypervolume(new_y_train, self.w_ref)
            smsego = self.current_hypervolume - new_hypervolume
            # print(smsego)
            return smsego

    def optimize(self, duplicate_manager=None):
        x_bounds = self.model._get_var_bound("search")
        default = np.array([(v[1] + v[0]) / 2 for _, v in x_bounds.items()])
        bounds = [(v[0], v[1]) for _, v in x_bounds.items()]
        result = opt.minimize(
            self._compute_acq, x0=default, bounds=bounds, method="L-BFGS-B"
        )
        return result.x[np.newaxis, :], result.fun


================================================
FILE: transopt/optimizer/acquisition_function/taf.py
================================================
import copy

import numpy as np
from GPyOpt.core.task.cost import constant_cost_withGradients
from GPyOpt.util.general import get_quantiles

from transopt.agent.registry import acf_registry
from transopt.optimizer.acquisition_function.acf_base import AcquisitionBase


@acf_registry.register('TAF')
class AcquisitionTAF(AcquisitionBase):
    """
    General template to create a new GPyOPt acquisition function

    :param model: GPyOpt class of model
    :param space: GPyOpt class of domain
    :param optimizer: optimizer of the acquisition. Should be a GPyOpt optimizer
    :param cost_withGradients: function that provides the evaluation cost and its gradients

    """
    # --- Set this line to true if analytical gradients are available
    analytical_gradient_prediction = False

    def __init__(self, config):
        super(AcquisitionTAF, self).__init__()
        if 'jitter' in config:
            self.jitter = config['jitter']
        else:
            self.jitter = 0.01

        if 'threshold' in config:
            self.threshold = config['threshold']
        else:
            self.threshold = 0

        self.cost_withGradients = constant_cost_withGradients

    def _compute_acq(self, x):
        n_sample = len(x)
        source_num = len(self.model._source_gps)
        n_models = source_num + 1
        acf_ei = np.empty((n_models, n_sample, 1))

        for task_uid in range(source_num):
            m, s = self.model._source_gps[task_uid].predict(x)
            _X = self.model._source_gps[task_uid]._X
            fmin = self.model._source_gps[task_uid].predict(_X)[0].min()
            phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
            acf_ei[task_uid] =  s * (u * Phi + phi)
        m,s = self.model.predict(x)
        for task_uid in range(source_num):
            acf_ei[task_uid] = acf_ei[task_uid] * self.model._source_gp_weights[task_uid]
        acf_ei[-1] = self.model._target_model_weight
        fmin = self.model.get_fmin()
        phi, Phi, u = get_quantiles(self.jitter, fmin, m, s)
        acf_ei[-1] = acf_ei[-1] * (s * (u * Phi + phi))
        f_acqu_ei = np.sum(acf_ei, axis=0)

        return f_acqu_ei

    def _compute_acq_withGradients(self, x):
        # --- DEFINE YOUR AQUISITION (TO BE MAXIMIZED) AND ITS GRADIENT HERE HERE
        #
        # Compute here the value of the new acquisition function. Remember that x is a 2D  numpy array
        # with a point in the domanin in each row. f_acqu_x should be a column vector containing the
        # values of the acquisition at x. df_acqu_x contains is each row the values of the gradient of the
        # acquisition at each point of x.
        #
        # NOTE: this function is optional. If note available the gradients will be approxiamted numerically.
        raise NotImplementedError()


================================================
FILE: transopt/optimizer/construct_optimizer.py
================================================

from transopt.agent.registry import (acf_registry, sampler_registry,
                                     selector_registry, space_refiner_registry,
                                     model_registry, pretrain_registry, normalizer_registry)
from transopt.optimizer.optimizer_base.bo import BO


def ConstructOptimizer(optimizer_config: dict = None, seed: int = 0) -> BO:
    
    # if 'SpaceRefinerParameters' not in optimizer_config:
    #     optimizer_config['SpaceRefinerParameters'] = {}
    # if 'SamplerParameters' not in optimizer_config:
    #     optimizer_config['SamplerParameters'] = {}
    # if 'ACFParameters' not in optimizer_config:
    #     optimizer_config['ACFParameters'] = {}
    # if 'ModelParameters' not in optimizer_config:
    #     optimizer_config['ModelParameters'] = {}
    # if 'PretrainParameters' not in optimizer_config:
    #     optimizer_config['PretrainParameters'] = {}
    # if 'NormalizerParameters' not in optimizer_config:
    #     optimizer_config['NormalizerParameters'] = {}
    # if 'SamplerInitNum' not in optimizer_config: 
    #     optimizer_config['SamplerInitNum'] = 11
            
    """Create the optimizer object."""
    if optimizer_config['SpaceRefiner'] == 'None':
        SpaceRefiner = None
    else:
        if 'SpaceRefinerParameters' not in optimizer_config:
            optimizer_config['SpaceRefinerParameters'] = {}
        SpaceRefiner = space_refiner_registry[optimizer_config['SpaceRefiner']](optimizer_config['SpaceRefinerParameters'])
        
    
    Sampler = sampler_registry[optimizer_config['Sampler']](optimizer_config['SamplerInitNum'], optimizer_config['SamplerParameters'])
    ACF = acf_registry[optimizer_config['ACF']](config = optimizer_config['ACFParameters'])

    # Model = model_registry[optimizer_config['Model']](config = optimizer_config['ModelParameters'])
    Model = model_registry[optimizer_config['Model']]()

    if optimizer_config['Pretrain'] == 'None':
        Pretrain = None
    else:
        Pretrain = pretrain_registry[optimizer_config['Pretrain']](optimizer_config['PretrainParameters'])
        
    
    if optimizer_config['Normalizer'] == 'None':
        Normalizer = None
    else:
        Normalizer = normalizer_registry[optimizer_config['Normalizer']](optimizer_config['NormalizerParameters'])
        
    
    optimizer = BO(SpaceRefiner, Sampler, ACF, Pretrain, Model, Normalizer, optimizer_config)
    
    
    return optimizer

def ConstructSelector(optimizer_config, dict = None, seed: int = 0):
    DataSelectors = {}
    
    
    # if optimizer_config['SpaceRefinerDataSelector'] == 'None':
    #     DataSelectors['SpaceRefinerDataSelector'] = None
    # else:
    #     DataSelectors['SpaceRefinerDataSelector'] = selector_registry(optimizer_config['SpaceRefinerDataSelector'], optimizer_config['SpaceRefinerDataSelectorParameters'])
    
    # if optimizer_config['SamplerDataSelector'] == 'None':
    #     DataSelectors['SamplerDataSelector'] = None
    # else:
    #     DataSelectors['SamplerDataSelector'] = selector_registry(optimizer_config['SamplerDataSelector'], optimizer_config['SamplerDataSelectorParameters'])
    
    # if optimizer_config['ACFDataSelector'] == 'None':
    #     DataSelectors['ACFDataSelector'] = None
    # else:
    #     DataSelectors['ACFDataSelector'] = selector_registry(optimizer_config['ACFDataSelector'], optimizer_config['ACFDataSelectorParameters'])
    
    # if optimizer_config['PretrainDataSelector'] == 'None':
    #     DataSelectors['PretrainDataSelector'] = None
    # else:
    #     DataSelectors['PretrainDataSelector'] = selector_registry(optimizer_config['PretrainDataSelector'], optimizer_config['PretrainDataSelectorParameters'])
    
    # if optimizer_config['ModelDataSelector'] == 'None':
    #     DataSelectors['ModelDataSelector'] = None
    # else:
    #     DataSelectors['ModelDataSelector'] = selector_registry(optimizer_config['ModelDataSelector'], optimizer_config['ModelDataSelectorParameters'])

    # if optimizer_config['NormalizerDataSelector'] == 'None':
    #     DataSelectors['NormalizerDataSelector'] = None
    # else:
    #     DataSelectors['NormalizerDataSelector'] = selector_registry(optimizer_config['NormalizerDataSelector'], optimizer_config['NormalizerDataSelectorParameters'])
    
    
    for key in optimizer_config.keys():
        if key.endswith('DataSelector'):
            if optimizer_config[key] == 'None':
                DataSelectors[key] = None
            else:
                DataSelectors[key] = selector_registry[optimizer_config[key]](optimizer_config[key + 'Parameters'])
    return DataSelectors

================================================
FILE: transopt/optimizer/model/HyperBO.py
================================================
import random
import time

from external.hyperbo.basics import definitions as defs
from external.hyperbo.basics import params_utils
from external.hyperbo.gp_utils import gp
from external.hyperbo.gp_utils import kernel
from external.hyperbo.gp_utils import mean
from external.hyperbo.gp_utils import utils
from external.hyperbo.bo_utils import data
from external.hyperbo.gp_utils import objectives as obj
import jax
import jax.numpy as jnp
import matplotlib
import matplotlib.pyplot as plt
from typing import Any, Callable, Dict, List, Tuple, Union

font = {
    'family': 'serif',
    'weight': 'normal',
    'size': 7,
}
axes = {'titlesize': 7, 'labelsize': 7}
matplotlib.rc('font', **font)
matplotlib.rc('axes', **axes)

DEFAULT_WARP_FUNC = utils.DEFAULT_WARP_FUNC
GPParams = defs.GPParams
SubDataset = defs.SubDataset

class hyperbo():
    def __init__(self, seed = 0):
        self.mean_func = mean.constant
        self.cov_func = kernel.squared_exponential
        self.warp_func = DEFAULT_WARP_FUNC
        self.key = jax.random.PRNGKey(seed)
        self._X = None
        self._Y = None

        self.params = GPParams(
            model={
                'constant': 5.,
                'lengthscale': 1.,
                'signal_variance': 1.0,
                'noise_variance': 0.01,
            },
            config={
                'Method': 'adam',
                'learning_rate': 1e-5,
                'beta': 0.9,
                'max_training_step': 1,
                'batch_size': 100,
                'retrain': 1,
            })

    def pretrain(self, Meta_data, Target_data):
        dataset = {}
        num_train_functions = len(Meta_data['X'])
        for sub_dataset_id in range(num_train_functions):
            x = jax.numpy.array(Meta_data['X'][sub_dataset_id])
            y = jax.numpy.array(Meta_data['Y'][sub_dataset_id])
            dataset[str(sub_dataset_id)] = SubDataset(x, y)

        self.target_dataset_id = num_train_functions
        self._X = Target_data['X']
        self._Y = Target_data['Y']
        x = jax.numpy.array(self._X)
        y = jax.numpy.array(self._Y)
        dataset[str(self.target_dataset_id)] = SubDataset(x, y)

        self.model = gp.GP(
            dataset=dataset,
            params=self.params,
            mean_func=self.mean_func,
            cov_func=self.cov_func,
            warp_func=self.warp_func,
        )
        assert self.key is not None, ('Cannot initialize with '
                                             'init_random_key == None.')
        key, subkey = jax.random.split(self.key)
        self.model.initialize_params(subkey)
        # Infer GP parameters.
        key, subkey = jax.random.split(self.key)
        self.model.train(subkey)

    def retrain(self, Target_data):
        self._X = Target_data['X']
        self._Y = Target_data['Y']
        x = jax.numpy.array(self._X)
        y = jax.numpy.array(self._Y)
        dataset =  SubDataset(x, y)

        self.model.update_sub_dataset(
            dataset, sub_dataset_key=str(self.target_dataset_id), is_append=False)

        retrain_condition = 'retrain' in self.model.params.config and self.model.params.config[
            'retrain'] > 0 and self.model.dataset[str(self.target_dataset_id)].x.shape[0] > 0
        if not retrain_condition:
            return
        if self.model.params.config['objective'] in [obj.regkl, obj.regeuc]:
            raise ValueError('Objective must include NLL to retrain.')
        max_training_step = self.model.params.config['retrain']
        self.model.params.config['max_training_step'] = max_training_step
        key, subkey = jax.random.split(self.key)
        self.model.train(subkey)

    def predict(self, X, subset_data_id:Union[int, str] = 0):
        _X = jnp.array(X)
        mu, var = self.model.predict(_X, subset_data_id)

        return mu, var


================================================
FILE: transopt/optimizer/model/__init__.py
================================================
from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.pr import PR
from transopt.optimizer.model.rf import RF

from transopt.optimizer.model.mtgp import MTGP
from transopt.optimizer.model.mhgp import MHGP
from transopt.optimizer.model.rgpe import RGPE
from transopt.optimizer.model.sgpt import SGPT
from transopt.optimizer.model.rbfn import RBFN

from transopt.optimizer.model.mlp import MLP
from transopt.optimizer.model.deepkernel import DeepKernelGP
from transopt.optimizer.model.neuralprocess import NeuralProcess


================================================
FILE: transopt/optimizer/model/bohb.py
================================================
import copy

import numpy as np
import scipy
import statsmodels.api as sm
import dask


class KDEMultivariate(sm.nonparametric.KDEMultivariate):
    def __init__(self, configurations):
        self.configurations = configurations
        data = []
        for config in configurations:
            data.append(np.array(config.to_list()))
        data = np.array(data)
        super().__init__(data, configurations[0].kde_vartypes, 'normal_reference')


class Log():
    def __init__(self, size):
        self.size = size
        self.logs = np.empty(self.size, dtype=dict)
        self.best = {'loss': np.inf}

    def __getitem__(self, index):
        return self.logs[index]

    def __setitem__(self, index, value):
        self.logs[index] = value

    def __repr__(self):
        string = []
        string.append(f's_max: {self.size}')
        for s, log in enumerate(self.logs):
            string.append(f's: {s}')
            for budget in log:
                string.append(f'Budget: {budget}')
                string.append(f'Loss: {log[budget]["loss"]}')
                string.append(str(log[budget]['hyperparameter']))
        string.append('Best Hyperparameter Configuration:')
        string.append(f'Budget: {self.best["budget"]}')
        string.append(f'Loss: {self.best["loss"]}')
        string.append(str(self.best['hyperparameter']))
        return '\n'.join(string)


class BOHB:
    def __init__(self, configspace, evaluate, max_budget, min_budget,
                 eta=3, best_percent=0.15, random_percent=1/3, n_samples=64,
                 bw_factor=3, min_bandwidth=1e-3, n_proc=1):
        self.eta = eta
        self.configspace = configspace
        self.max_budget = max_budget
        self.min_budget = min_budget
        self.evaluate = evaluate

        self.best_percent = best_percent
        self.random_percent = random_percent
        self.n_samples = n_samples
        self.min_bandwidth = min_bandwidth
        self.bw_factor = bw_factor
        self.n_proc = n_proc

        self.s_max = int(np.log(self.max_budget/self.min_budget) / np.log(self.eta))
        self.budget = (self.s_max + 1) * self.max_budget

        self.kde_good = None
        self.kde_bad = None
        self.samples = np.array([])

    def optimize(self):
        logs = Log(self.s_max+1)
        for s in reversed(range(self.s_max + 1)):
            logs[s] = {}
            n = int(np.ceil(
                (self.budget * (self.eta ** s)) / (self.max_budget * (s + 1))))
            r = self.max_budget * (self.eta ** -s)
            self.kde_good = None
            self.kde_bad = None
            self.samples = np.array([])
            for i in range(s+1):
                n_i = n * self.eta ** (-i)  # Number of configs
                r_i = r * self.eta ** (i)  # Budget
                logs[s][r_i] = {'loss': np.inf}

                samples = []
                losses = []
                for j in range(n):
                    sample = self.get_sample()
                    if self.n_proc > 1:
                        loss = dask.delayed(self.evaluate)(sample.to_dict(), int(r_i))
                    else:
                        loss = self.evaluate(sample.to_dict(), int(r_i))
                    samples.append(sample)
                    losses.append(loss)
                if self.n_proc > 1:
                    losses = dask.compute(
                        *losses, scheduler='processes', num_workers=self.n_proc)
                midx = np.argmin(losses)
                logs[s][r_i]['loss'] = losses[midx]
                logs[s][r_i]['hyperparameter'] = samples[midx]

                if logs[s][r_i]['loss'] < logs.best['loss']:
                    logs.best['loss'] = logs[s][r_i]['loss']
                    logs.best['budget'] = r_i
                    logs.best['hyperparameter'] = logs[s][r_i]['hyperparameter']

                n = int(np.ceil(n_i/self.eta))
                idxs = np.argsort(losses)
                self.samples = np.array(samples)[idxs[:n]]
                n_good = int(np.ceil(self.best_percent * len(samples)))
                if n_good > len(samples[0].kde_vartypes) + 2:
                    good_data = np.array(samples)[idxs[:n_good]]
                    bad_data = np.array(samples)[idxs[n_good:]]
                    self.kde_good = KDEMultivariate(good_data)
                    self.kde_bad = KDEMultivariate(bad_data)
                    self.kde_bad.bw = np.clip(
                        self.kde_bad.bw, self.min_bandwidth, None)
                    self.kde_good.bw = np.clip(
                        self.kde_good.bw, self.min_bandwidth, None)
        return logs

    def get_sample(self):
        if self.kde_good is None or np.random.random() < self.random_percent:
            if len(self.samples):
                idx = np.random.randint(0, len(self.samples))
                sample = self.samples[idx]
                self.samples = np.delete(self.samples, idx)
                return sample
            else:
                return self.configspace.sample_configuration()

        # Sample from the good data
        best_tpe_val = np.inf
        for _ in range(self.n_samples):
            idx = np.random.randint(0, len(self.kde_good.configurations))
            configuration = copy.deepcopy(self.kde_good.configurations[idx])
            for hyperparameter, bw in zip(configuration, self.kde_good.bw):
                if hyperparameter.type == cs.Type.Continuous:
                    value = hyperparameter.value
                    bw = bw * self.bw_factor
                    hyperparameter.value = scipy.stats.truncnorm.rvs(
                        -value/bw, (1-value)/bw, loc=value, scale=bw)
                elif hyperparameter.type == cs.Type.Discrete:
                    if np.random.rand() >= (1-bw):
                        idx = np.random.randint(len(hyperparameter.choices))
                        hyperparameter.value = idx
                else:
                    raise NotImplementedError

            tpe_val = (self.kde_bad.pdf(configuration.to_list()) /
                       self.kde_good.pdf(configuration.to_list()))
            if tpe_val < best_tpe_val:
                best_tpe_val = tpe_val
                best_configuration = configuration

        return best_configuration

================================================
FILE: transopt/optimizer/model/deepkernel.py
================================================

"""
This FSBO implementation is based on the original implementation from Hadi Samer Jomaa
for his work on "Transfer Learning for Bayesian HPOBench with End-to-End Landmark Meta-Features"
at the NeurIPS 2021 MetaLearning Workshop 

The implementation for Deep Kernel Learning is based on the original Gpytorch example: 
https://docs.gpytorch.ai/en/stable/examples/06_PyTorch_NN_Integration_DKL/KISSGP_Deep_Kernel_Regression_CUDA.html

"""

import copy
import logging
import os

import gpytorch
import numpy as np
import torch
import torch.nn as nn
from scipy.optimize import differential_evolution
from transopt.agent.registry import model_registry

np.random.seed(1203)
RandomQueryGenerator= np.random.RandomState(413)
RandomSupportGenerator= np.random.RandomState(413)
RandomTaskGenerator = np.random.RandomState(413)


class Metric(object):
    def __init__(self,prefix='train: '):
        self.reset()
        self.message=prefix + "loss: {loss:.2f} - noise: {log_var:.2f} - mse: {mse:.2f}"
        
    def update(self,loss,noise,mse):
        self.loss.append(np.asscalar(loss))
        self.noise.append(np.asscalar(noise))
        self.mse.append(np.asscalar(mse))
    
    def reset(self,):
        self.loss = []
        self.noise = []
        self.mse = []
    
    def report(self):
        return self.message.format(loss=np.mean(self.loss),
                            log_var=np.mean(self.noise),
                            mse=np.mean(self.mse))
    
    def get(self):
        return {"loss":np.mean(self.loss),
                "noise":np.mean(self.noise),
                "mse":np.mean(self.mse)}
    

def totorch(x,device):

    return torch.Tensor(x).to(device)    

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size=[32,32,32,32], dropout=0.0):
        
        super(MLP, self).__init__()
        self.nonlinearity = nn.ReLU()
        self.fc = nn.ModuleList([nn.Linear(in_features=input_size, out_features=hidden_size[0])])
        for d_out in hidden_size[1:]:
            self.fc.append(nn.Linear(in_features=self.fc[-1].out_features, out_features=d_out))
        self.out_features = hidden_size[-1]
        self.dropout = nn.Dropout(dropout)
    def forward(self,x):
        
        for fc in self.fc[:-1]:
            x = fc(x)
            x = self.dropout(x)
            x = self.nonlinearity(x)
        x = self.fc[-1](x)
        x = self.dropout(x)
        return x

class ExactGPLayer(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood,config,dims ):
        super(ExactGPLayer, self).__init__(train_x, train_y, likelihood)
        self.mean_module  = gpytorch.means.ConstantMean()

        if(config["kernel"]=='rbf' or config["kernel"]=='RBF'):
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=dims if config["ard"] else None))
        elif(config["kernel"]=='matern'):
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=config["nu"],ard_num_dims=dims if config["ard"] else None))
        else:
            raise ValueError("[ERROR] the kernel '" + str(config["kernel"]) + "' is not supported for regression, use 'rbf' or 'spectral'.")
            
    def forward(self, x):
        mean_x  = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
    
    
@model_registry.register("DeepKernelGP")
class DeepKernelGP(nn.Module):
    def __init__(self, config = {}):
        super(DeepKernelGP, self).__init__()

        if len(config) == 0:
            self.config = {"kernel": "matern", 'ard': False, "nu": 2.5, 'hidden_size': [32,32,32,32], 'n_inner_steps': 1,
                           'test_batch_size':1, 'batch_size':1, 'seed':0, 'eval_batch_size':1000, 'verbose':True, 'loss_tol':0.0001,
                           'max_patience':16, 'lr':0.001, 'epochs':100, 'load_model': False, 'checkpoint_path': './external/model/FSBO/Seed_0_1'}
        else:
            self.config = config
        torch.manual_seed(self.config['seed'])
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.hidden_size = self.config['hidden_size']
        self.kernel_config = {"kernel": self.config['kernel'], "ard": self.config['ard'], "nu": self.config['nu']}
        self.max_patience = self.config['max_patience']
        self.lr  = self.config['lr']
        self.load_model = self.config['load_model']
        self.checkpoint = self.config['checkpoint_path']
        
        self.epochs = self.config['epochs']
        self.verbose = self.config['verbose']
        self.loss_tol = self.config['loss_tol']
        self.eval_batch_size = self.config['eval_batch_size']
        self.has_model = False


    def get_model_likelihood_mll(self, train_size):
        
        train_x=torch.ones(train_size, self.feature_extractor.out_features).to(self.device)
        train_y=torch.ones(train_size).to(self.device)

        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        model = ExactGPLayer(train_x=train_x, train_y=train_y, likelihood=likelihood, config=self.kernel_config,dims = self.feature_extractor.out_features)
        self.model = model.to(self.device)
        self.likelihood = likelihood.to(self.device)
        self.mll        = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device)


    def fit(self,
            X: np.ndarray,
            Y: np.ndarray,
            optimize: bool = False,):

        self.X_obs, self.y_obs = totorch(X, self.device), totorch(Y, self.device).reshape(-1)
        
        if self.load_model:
            assert(self.checkpoint is not None)
            print("Model_loaded")
            self.load_checkpoint(os.path.join(self.checkpoint,"weights"))

        if self.has_model == False:
            self.input_size = X.shape[1]
            self.feature_extractor = MLP(self.input_size, hidden_size = self.hidden_size).to(self.device)
            self.get_model_likelihood_mll(1)
            self.has_model = True
        
        losses = [np.inf]
        best_loss = np.inf
        weights = copy.deepcopy(self.state_dict())
        patience=0
        optimizer = torch.optim.Adam([{'params': self.model.parameters(), 'lr': self.lr},
                                {'params': self.feature_extractor.parameters(), 'lr': self.lr}])
                    
        for _ in range(self.epochs):
            optimizer.zero_grad()
            z = self.feature_extractor(self.X_obs)
            self.model.set_train_data(inputs=z, targets=self.y_obs, strict=False)
            predictions = self.model(z)
            try:
                loss = -self.mll(predictions, self.model.train_targets)
                loss.backward()
                optimizer.step()
            except Exception as e:
                raise e
            
            if self.verbose:
                print("Iter {iter}/{epochs} - Loss: {loss:.5f}   noise: {noise:.5f}".format(
                    iter=_+1,epochs=self.epochs,loss=loss.item(),noise=self.likelihood.noise.item()))                
            losses.append(loss.detach().to("cpu").item())
            if best_loss>losses[-1]:
                best_loss = losses[-1]
                weights = copy.deepcopy(self.state_dict())
            if np.allclose(losses[-1],losses[-2],atol=self.loss_tol):
                patience+=1
            else:
                patience=0
            if patience>self.max_patience:
                break
        self.load_state_dict(weights)
        return losses
    
    def load_checkpoint(self, checkpoint):
        ckpt = torch.load(checkpoint,map_location=torch.device(self.device))
        self.model.load_state_dict(ckpt['gp'],strict=False)
        self.likelihood.load_state_dict(ckpt['likelihood'],strict=False)
        self.feature_extractor.load_state_dict(ckpt['net'],strict=False)
        

    def predict(self, X_pen):
        
        query_X = totorch(X_pen, self.device)
        self.model.eval()
        self.feature_extractor.eval()
        self.likelihood.eval()        

        z_support = self.feature_extractor(self.X_obs).detach()
        self.model.set_train_data(inputs=z_support, targets=self.y_obs, strict=False)

        with torch.no_grad():
            z_query = self.feature_extractor(query_X).detach()
            pred    = self.likelihood(self.model(z_query))

            
        mu    = pred.mean.detach().to("cpu").numpy()[: ,np.newaxis]
        stddev = pred.stddev.detach().to("cpu").numpy()[: ,np.newaxis]
        
        return mu,stddev

    def continuous_maximization( self, dim, bounds, acqf):

        result = differential_evolution(acqf, bounds=bounds, updating='immediate',workers=1, maxiter=20000, init="sobol")
        return result.x.reshape(-1,dim)


    def get_fmin(self):
        return np.min(self.y_obs.detach().to("cpu").numpy())


================================================
FILE: transopt/optimizer/model/dyhpo.py
================================================
import logging
import os
from copy import deepcopy
from typing import Dict, Tuple

import gpytorch
import numpy as np
import torch
import torch.nn as nn
from torch import cat

from transopt.agent.registry import model_registry


class FeatureExtractor(nn.Module):
    """
    The feature extractor that is part of the deep kernel.
    """
    def __init__(self, configuration):
        super(FeatureExtractor, self).__init__()

        self.configuration = configuration

        self.nr_layers = configuration['nr_layers']
        self.act_func = nn.LeakyReLU()
        # adding one to the dimensionality of the initial input features
        # for the concatenation with the budget.
        initial_features = configuration['nr_initial_features'] + 1
        self.fc1 = nn.Linear(initial_features, configuration['layer1_units'])
        self.bn1 = nn.BatchNorm1d(configuration['layer1_units'])
        for i in range(2, self.nr_layers):
            setattr(
                self,
                f'fc{i + 1}',
                nn.Linear(configuration[f'layer{i - 1}_units'], configuration[f'layer{i}_units']),
            )
            setattr(
                self,
                f'bn{i + 1}',
                nn.BatchNorm1d(configuration[f'layer{i}_units']),
            )


        setattr(
            self,
            f'fc{self.nr_layers}',
            nn.Linear(
                configuration[f'layer{self.nr_layers - 1}_units'] +
                configuration['cnn_nr_channels'],  # accounting for the learning curve features
                configuration[f'layer{self.nr_layers}_units']
            ),
        )
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=1, kernel_size=(configuration['cnn_kernel_size'],), out_channels=4),
            nn.AdaptiveMaxPool1d(1),
        )

    def forward(self, x, budgets, learning_curves):

        # add an extra dimensionality for the budget
        # making it nr_rows x 1.
        budgets = torch.unsqueeze(budgets, dim=1)
        # concatenate budgets with examples
        x = cat((x, budgets), dim=1)
        x = self.fc1(x)
        x = self.act_func(self.bn1(x))

        for i in range(2, self.nr_layers):
            x = self.act_func(
                getattr(self, f'bn{i}')(
                    getattr(self, f'fc{i}')(
                        x
                    )
                )
            )

        # add an extra dimensionality for the learning curve
        # making it nr_rows x 1 x lc_values.
        learning_curves = torch.unsqueeze(learning_curves, 1)
        lc_features = self.cnn(learning_curves)
        # revert the output from the cnn into nr_rows x nr_kernels.
        lc_features = torch.squeeze(lc_features, 2)

        # put learning curve features into the last layer along with the higher level features.
        x = cat((x, lc_features), dim=1)
        x = self.act_func(getattr(self, f'fc{self.nr_layers}')(x))

        return x


class GPRegressionModel(gpytorch.models.ExactGP):
    """
    A simple GP model.
    """
    def __init__(
        self,
        train_x: torch.Tensor,
        train_y: torch.Tensor,
        likelihood: gpytorch.likelihoods.GaussianLikelihood,
    ):
        """
        Constructor of the GPRegressionModel.

        Args:
            train_x: The initial train examples for the GP.
            train_y: The initial train labels for the GP.
            likelihood: The likelihood to be used.
        """
        super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)

        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):

        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)

        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


class DyHPO:
    """
    The DyHPO DeepGP model.
    """
    def __init__(
        self,
        configuration: Dict,
        device: torch.device,
        dataset_name: str = 'unknown',
        output_path: str = '.',
        seed: int = 11,
    ):
        """
        The constructor for the DyHPO model.

        Args:
            configuration: The configuration to be used
                for the different parts of the surrogate.
            device: The device where the experiments will be run on.
            dataset_name: The name of the dataset for the current run.
            output_path: The path where the intermediate/final results
                will be stored.
            seed: The seed that will be used to store the checkpoint
                properly.
        """
        super(DyHPO, self).__init__()
        self.feature_extractor = FeatureExtractor(configuration)
        self.batch_size = configuration['batch_size']
        self.nr_epochs = configuration['nr_epochs']
        self.early_stopping_patience = configuration['nr_patience_epochs']
        self.refine_epochs = 50
        self.dev = device
        self.seed = seed
        self.model, self.likelihood, self.mll = \
            self.get_model_likelihood_mll(
                configuration[f'layer{self.feature_extractor.nr_layers}_units']
            )

        self.model.to(self.dev)
        self.likelihood.to(self.dev)
        self.feature_extractor.to(self.dev)

        self.optimizer = torch.optim.Adam([
            {'params': self.model.parameters(), 'lr': configuration['learning_rate']},
            {'params': self.feature_extractor.parameters(), 'lr': configuration['learning_rate']}],
        )

        self.configuration = configuration
        # the number of initial points for which we will retrain fully from scratch
        # This is basically equal to the dimensionality of the search space + 1.
        self.initial_nr_points = 10
        # keeping track of the total hpo iterations. It will be used during the optimization
        # process to switch from fully training the model, to refining.
        self.iterations = 0
        # flag for when the optimization of the model should start from scratch.
        self.restart = True

        self.logger = logging.getLogger(__name__)

        self.checkpoint_path = os.path.join(
            output_path,
            'checkpoints',
            f'{dataset_name}',
            f'{self.seed}',
        )

        os.makedirs(self.checkpoint_path, exist_ok=True)

        self.checkpoint_file = os.path.join(
            self.checkpoint_path,
            'checkpoint.pth'
        )

    def restart_optimization(self):
        """
        Restart the surrogate model from scratch.
        """
        self.feature_extractor = FeatureExtractor(self.configuration).to(self.dev)
        self.model, self.likelihood, self.mll = \
            self.get_model_likelihood_mll(
                self.configuration[f'layer{self.feature_extractor.nr_layers}_units'],
            )

        self.optimizer = torch.optim.Adam([
            {'params': self.model.parameters(), 'lr': self.configuration['learning_rate']},
            {'params': self.feature_extractor.parameters(), 'lr': self.configuration['learning_rate']}],
        )

    def get_model_likelihood_mll(
        self,
        train_size: int,
    ) -> Tuple[GPRegressionModel, gpytorch.likelihoods.GaussianLikelihood, gpytorch.mlls.ExactMarginalLogLikelihood]:
        """
        Called when the surrogate is first initialized or restarted.

        Args:
            train_size: The size of the current training set.

        Returns:
            model, likelihood, mll - The GP model, the likelihood and
                the marginal likelihood.
        """
        train_x = torch.ones(train_size, train_size).to(self.dev)
        train_y = torch.ones(train_size).to(self.dev)

        likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.dev)
        model = GPRegressionModel(train_x=train_x, train_y=train_y, likelihood=likelihood).to(self.dev)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.dev)

        return model, likelihood, mll

    def train_pipeline(self, data: Dict[str, torch.Tensor], load_checkpoint: bool = False):
        """
        Train the surrogate model.

        Args:
            data: A dictionary which has the training examples, training features,
                training budgets and in the end the training curves.
            load_checkpoint: A flag whether to load the state from a previous checkpoint,
                or whether to start from scratch.
        """
        self.iterations += 1
        self.logger.debug(f'Starting iteration: {self.iterations}')
        # whether the state has been changed. Basically, if a better loss was found during
        # this optimization iteration then the state (weights) were changed.
        weights_changed = False

        if load_checkpoint:
            try:
                self.load_checkpoint()
            except FileNotFoundError:
                self.logger.error(f'No checkpoint file found at: {self.checkpoint_file}'
                                  f'Training the GP from the beginning')

        self.model.train()
        self.likelihood.train()
        self.feature_extractor.train()

        self.optimizer = torch.optim.Adam([
            {'params': self.model.parameters(), 'lr': self.configuration['learning_rate']},
            {'params': self.feature_extractor.parameters(), 'lr': self.configuration['learning_rate']}],
        )

        X_train = data['X_train']
        train_budgets = data['train_budgets']
        train_curves = data['train_curves']
        y_train = data['y_train']

        initial_state = self.get_state()
        training_errored = False

        if self.restart:
            self.restart_optimization()
            nr_epochs = self.nr_epochs
            # 2 cases where the statement below is hit.
            # - We are switching from the full training phase in the beginning to refining.
            # - We are restarting because our refining diverged
            if self.initial_nr_points <= self.iterations:
                self.restart = False
        else:
            nr_epochs = self.refine_epochs

        # where the mean squared error will be stored
        # when predicting on the train set
        mse = 0.0

        for epoch_nr in range(0, nr_epochs):

            nr_examples_batch = X_train.size(dim=0)
            # if only one example in the batch, skip the batch.
            # Otherwise, the code will fail because of batchnorm
            if nr_examples_batch == 1:
                continue

            # Zero backprop gradients
            self.optimizer.zero_grad()

            projected_x = self.feature_extractor(X_train, train_budgets, train_curves)
            self.model.set_train_data(projected_x, y_train, strict=False)
            output = self.model(projected_x)

            try:
                # Calc loss and backprop derivatives
                loss = -self.mll(output, self.model.train_targets)
                loss_value = loss.detach().to('cpu').item()
                mse = gpytorch.metrics.mean_squared_error(output, self.model.train_targets)
                self.logger.debug(
                    f'Epoch {epoch_nr} - MSE {mse:.5f}, '
                    f'Loss: {loss_value:.3f}, '
                    f'lengthscale: {self.model.covar_module.base_kernel.lengthscale.item():.3f}, '
                    f'noise: {self.model.likelihood.noise.item():.3f}, '
                )
                loss.backward()
                self.optimizer.step()
            except Exception as training_error:
                self.logger.error(f'The following error happened while training: {training_error}')
                # An error has happened, trigger the restart of the optimization and restart
                # the model with default hyperparameters.
                self.restart = True
                training_errored = True
                break

        """
        # metric too high, time to restart, or we risk divergence
        if mse > 0.15:
            if not self.restart:
                self.restart = True
        """
        if training_errored:
            self.save_checkpoint(initial_state)
            self.load_checkpoint()

    def predict_pipeline(
        self,
        train_data: Dict[str, torch.Tensor],
        test_data: Dict[str, torch.Tensor],
    ) -> Tuple[np.ndarray, np.ndarray]:
        """

        Args:
            train_data: A dictionary that has the training
                examples, features, budgets and learning curves.
            test_data: Same as for the training data, but it is
                for the testing part and it does not feature labels.

        Returns:
            means, stds: The means of the predictions for the
                testing points and the standard deviations.
        """
        self.model.eval()
        self.feature_extractor.eval()
        self.likelihood.eval()

        with torch.no_grad(): # gpytorch.settings.fast_pred_var():
            projected_train_x = self.feature_extractor(
                train_data['X_train'],
                train_data['train_budgets'],
                train_data['train_curves'],
            )
            self.model.set_train_data(inputs=projected_train_x, targets=train_data['y_train'], strict=False)
            projected_test_x = self.feature_extractor(
                test_data['X_test'],
                test_data['test_budgets'],
                test_data['test_curves'],
            )
            preds = self.likelihood(self.model(projected_test_x))

        means = preds.mean.detach().to('cpu').numpy().reshape(-1, )
        stds = preds.stddev.detach().to('cpu').numpy().reshape(-1, )

        return means, stds

    def load_checkpoint(self):
        """
        Load the state from a previous checkpoint.
        """
        checkpoint = torch.load(self.checkpoint_file)
        self.model.load_state_dict(checkpoint['gp_state_dict'])
        self.feature_extractor.load_state_dict(checkpoint['feature_extractor_state_dict'])
        self.likelihood.load_state_dict(checkpoint['likelihood_state_dict'])

    def save_checkpoint(self, state: Dict =None):
        """
        Save the given state or the current state in a
        checkpoint file.

        Args:
            state: The state to save, if none, it will
            save the current state.
        """

        if state is None:
            torch.save(
                self.get_state(),
                self.checkpoint_file,
            )
        else:
            torch.save(
                state,
                self.checkpoint_file,
            )

    def get_state(self) -> Dict[str, Dict]:
        """
        Get the current state of the surrogate.

        Returns:
            current_state: A dictionary that represents
                the current state of the surrogate model.
        """
        current_state = {
            'gp_state_dict': deepcopy(self.model.state_dict()),
            'feature_extractor_state_dict': deepcopy(self.feature_extractor.state_dict()),
            'likelihood_state_dict': deepcopy(self.likelihood.state_dict()),
        }

        return current_state

================================================
FILE: transopt/optimizer/model/get_model.py
================================================
from transopt.agent.registry import model_registry


def get_model(model_name, **kwargs):
    """Create the optimizer object."""
    model_class = model_registry.get(model_name)
    config = kwargs

    if model_class is not None:
        model = model_class(config=config)
    else:
        print(f"Refiner '{model_name}' not found in the registry.")
        raise NameError
    return model

================================================
FILE: transopt/optimizer/model/gp.py
================================================

import copy
import numpy as np
from typing import Tuple, List
from sklearn.preprocessing import StandardScaler

from GPy.models import GPRegression
from GPy.kern import RBF, Kern, Matern32

from transopt.optimizer.model.model_base import  Model
from transopt.optimizer.model.utils import is_pd, nearest_pd
from transopt.agent.registry import model_registry

@model_registry.register('GP')
class GP(Model):

    def __init__(
        self,
        kernel: Kern = None,
        noise_variance: float = 1.0,
        normalize = False,
        **options: dict
    ):
        """Initialize the Method.

        Args:
            kernel: The type of kernel of the GP. Defaults to squared exponential
                without automatic relevance determination.
            noise_variance: The variance of the observation noise.
            normalize: Train the model on normalized (`=True`) or original (`=False`)
                data.
            **options: Training arguments for `GPy.models.GPRegression`.
        """
        super().__init__()
        self._kernel = kernel if kernel is not None else None

        self._noise_variance = np.array(noise_variance)
        self._gpy_model = None


        self._options = options

    @property
    def kernel(self):
        """Return GPy kernel in the normalized space."""
        return self._kernel

    @property
    def noise_variance(self):
        """Return noise variance."""
        return self._noise_variance

    @kernel.setter
    def kernel(self, kernel: Kern):
        """Assign a new kernel to the GP.

        Args:
            kernel: the new kernel to be assigned.

        """
        self._kernel = kernel.copy()
        if self._gpy_model:
            # remove the old kernel from being a parameter of `gpy_model`
            self._gpy_model.unlink_parameter(self._gpy_model.kern)
            del self._gpy_model.kern
            self._gpy_model.kern = kernel  # assign new kernel
            # add the new kernel to the param class
            self._gpy_model.link_parameter(kernel)
            # re-cache the relevant quantities of the model
            self._gpy_model.parameters_changed()


    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        pass

    def fit(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        optimize: bool = False,
    ):
        self._X = np.copy(X)
        self._y = np.copy(Y)
        self._Y = np.copy(Y)

        _X = np.copy(self._X)
        _y = np.copy(self._y)


        if self._gpy_model is None:
            self._kernel = Matern32(input_dim=_X.shape[1])
            self._gpy_model = GPRegression(
                _X, _y, self._kernel, noise_var=self._noise_variance
            )
        else:
            self._gpy_model.set_XY(_X, _y)

        if optimize:
            optimize_restarts_options = self._options.get(
                "optimize_restarts_options", {}
            )

            kwargs = copy.deepcopy(optimize_restarts_options)

            if "verbose" not in optimize_restarts_options:
                kwargs["verbose"] = False
            kwargs["messages"] = False
            kwargs["optimizer"]='lbfgs'
            kwargs["max_iters"] = 2000

            try:
                self._gpy_model.optimize_restarts(num_restarts=3, **kwargs)
            except np.linalg.linalg.LinAlgError as e:
                # break
                print('Error: np.linalg.linalg.LinAlgError')


        # self._kernel = self._gpy_model.kern.copy()
        # self._noise_variance = self._gpy_model.likelihood.variance.values

    def predict(
        self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        mean, var = self._raw_predict(X, return_full, with_noise)

        if self._X is None:
            return mean, var

        return mean, var

    def _raw_predict(
        self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Predict functions distribution(s) for given test point(s) without taking into
        account data normalization. If `self._normalize` is `False`, return the same as
        `self.predict()`.

        Same input/output as `self.predict()`.
        """
        _X_test = X.copy()

        if self.X is None:
            mu = np.zeros((_X_test.shape[0], 1))
            cov = self._kernel.K(_X_test)
            var = np.diag(cov)[:, None]
            return mu, cov if return_full else var

        # ensure that no negative variance is predicted
        mu, cov = self._gpy_model.predict(
            _X_test, full_cov=return_full, include_likelihood=with_noise
        )
        if return_full:
            if not is_pd(cov):
                cov = nearest_pd(cov)
        else:
            cov = np.clip(cov, 1e-20, None)
        return mu, cov

    def predict_posterior_mean(self, X) -> np.ndarray:
        r"""Perform model inference.

        Predict the posterior mean of the latent distribution `f` for given test points.
        Achieves the same as `self.predict(data)[0]` but is much faster.
        Scales as $\mathcal{O}(n)$, where $n$ is the number of training points. Useful
        when the (co-)variance prediction is not needed. Computing the latter scales as
        $\mathcal{O}(n^2)$.

        Args:
            data: Input data to predict on. `shape = (n_points, n_features)`

        Returns:
            The mean prediction. `shape = (n_points, 1)`
        """
        _x = X.copy()
        if self._X is None:
            return np.zeros(_x.shape)
        _X = self._X.copy()

        mu = self._kernel.K(_x, _X) @ self._gpy_model.posterior.woodbury_vector

        return mu

    def predict_posterior_covariance(self, x1, x2) -> np.ndarray:
        """Perform model inference.

        Predict the posterior covariance between `(x1, x2)` of the latent distribution
        `f`. In case `x1 == x2`, achieves the same as
        `self.predict(x1, return_full=True)[1]`.

        Args:
            x1: Input data to predict on. `shape = (n_points_1, n_features)`
            x2: Input data to predict on. `shape = (n_points_2, n_features)`

        Returns:
            Predicted covariance for every input. `shape = (n_points_1, n_points_2)`
        """
        _X1 = x1.copy()
        _X2 = x2.copy()

        if self._X is None:
            cov = self._kernel.K(_X1, _X2)
            return cov

        cov = self._gpy_model.posterior_covariance_between_points(
            _X1, _X2, include_likelihood=False
        )

        return cov

    def compute_kernel(self, x1, x2) -> np.ndarray:
        """Evaluate the kernel matrix for desired input points.

        Wrapper around `self.kernel.K()` that takes care of normalization and allows
        for prediction of empty GP.

        Args:
            x1: First input to be queried. `shape = (n_points_1, n_features)`
            x2: Second input to be queried. `shape = (n_points_2, n_features)`

        Returns:
            Kernel values at `(x1, x2)`. `shape = (n_points_1, n_points_2)`
        """
        _x1, _x2 = np.copy(x1), np.copy(x2)

        return self._kernel.K(_x1, _x2)

    def compute_kernel_diagonal(self, X) -> np.ndarray:
        """Evaluate diagonal of kernel matrix for desired input points.

        Much faster than `compute_kernel()` in case only the diagonal is needed.
        Wrapper around `self.kernel.Kdiag()` that takes care of normalization and
        allows for prediction of empty GP.

        Args:
            data: Input to be queried. `shape = (n_points, n_features)`

        Returns:
            Kernel diagonal. `shape = (n_points, 1)`
        """
        _x = np.copy(X)

        return self._kernel.Kdiag(_x).reshape(-1, 1)

    def sample(
        self, X, size: int = 1, with_noise: bool = False
    ) -> np.ndarray:
        """Perform model inference.

        Sample functions from the posterior distribution for the given test points.

        Args:
            data: Input data to predict on. `shape = (n_points, n_features)`
            size: Number of functions to sample.
            with_noise: If `False`, the latent function `f` is considered. If `True`,
                the observed function `y` that includes the noise variance is
                considered.

        Returns:
            Sampled function value for every input. `shape = (n_points, size)`
        """
        mean, cov = self.predict(X, return_full=True, with_noise=with_noise)
        mean = mean.flatten()
        sample = np.random.multivariate_normal(mean, cov, size).T
        return sample
    
    def get_fmin(self):

        
        return np.min(self._y)
         

================================================
FILE: transopt/optimizer/model/hebo.py
================================================
# Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved.

# This program is free software; you can redistribute it and/or modify it under
# the terms of the MIT license.

# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the MIT License for more details.


import sys
from typing import Optional

import numpy  as np
import pandas as pd
import torch
from copy import deepcopy
from torch.quasirandom import SobolEngine
from sklearn.preprocessing import power_transform


from external.hebo.design_space.design_space import DesignSpace
from external.hebo.models.model_factory import get_model
from external.hebo.acquisitions.acq import MACE, Mean, Sigma
from external.hebo.acq_optimizers.evolution_optimizer import EvolutionOpt

from .abstract_optimizer import AbstractOptimizer

torch.set_num_threads(min(1, torch.get_num_threads()))


class HEBO(AbstractOptimizer):
    support_parallel_opt  = True
    support_combinatorial = True
    support_contextual    = True
    def __init__(self, space, model_name = 'gpy', rand_sample = None, acq_cls = MACE, es = 'nsga2', model_config = None,
                 scramble_seed: Optional[int] = None ):
        """
        model_name  : surrogate model to be used
        rand_sample : iterations to perform random sampling
        scramble_seed : seed used for the sobol sampling of the first initial points
        """
        super().__init__(space)
        self.space       = space
        self.es          = es
        self.X           = pd.DataFrame(columns = self.space.para_names)
        self.y           = np.zeros((0, 1))
        self.model_name  = model_name
        self.rand_sample = 1 + self.space.num_paras if rand_sample is None else max(2, rand_sample)
        self.scramble_seed = scramble_seed
        self.sobol       = SobolEngine(self.space.num_paras, scramble = True, seed = scramble_seed)
        self.acq_cls     = acq_cls
        self._model_config = model_config

    def quasi_sample(self, n, fix_input = None): 
        samp    = self.sobol.draw(n)
        samp    = samp * (self.space.opt_ub - self.space.opt_lb) + self.space.opt_lb
        x       = samp[:, :self.space.num_numeric]
        xe      = samp[:, self.space.num_numeric:]
        for i, n in enumerate(self.space.numeric_names):
            if self.space.paras[n].is_discrete_after_transform:
                x[:, i] = x[:, i].round()
        df_samp = self.space.inverse_transform(x, xe)
        if fix_input is not None:
            for k, v in fix_input.items():
                df_samp[k] = v
        return df_samp

    @property
    def model_config(self):
        if self._model_config is None:
            if self.model_name == 'gp':
                cfg = {
                        'lr'           : 0.01,
                        'num_epochs'   : 100,
                        'verbose'      : False,
                        'noise_lb'     : 8e-4, 
                        'pred_likeli'  : False
                        }
            elif self.model_name == 'gpy':
                cfg = {
                        'verbose' : False,
                        'warp'    : True,
                        'space'   : self.space
                        }
            elif self.model_name == 'gpy_mlp':
                cfg = {
                        'verbose' : False
                        }
            elif self.model_name == 'rf':
                cfg =  {
                        'n_estimators' : 20
                        }
            else:
                cfg = {}
        else:
            cfg = deepcopy(self._model_config)

        if self.space.num_categorical > 0:
            cfg['num_uniqs'] = [len(self.space.paras[name].categories) for name in self.space.enum_names]
        return cfg

    def get_best_id(self, fix_input : dict = None) -> int:
        if fix_input is None:
            return np.argmin(self.y.reshape(-1))
        X = self.X.copy()
        y = self.y.copy()
        for k, v in fix_input.items():
            if X[k].dtype != 'float':
                crit = (X[k] != v).values
            else:
                crit = ((X[k] - v).abs() > np.finfo(float).eps).values
            y[crit]  = np.inf
        if np.isfinite(y).any():
            return np.argmin(y.reshape(-1))
        else:
            return np.argmin(self.y.reshape(-1))

    def suggest(self, n_suggestions=1, fix_input = None):
        if self.acq_cls != MACE and n_suggestions != 1:
            raise RuntimeError('Parallel optimization is supported only for MACE acquisition')
        if self.X.shape[0] < self.rand_sample:
            sample = self.quasi_sample(n_suggestions, fix_input)
            return sample
        else:
            X, Xe = self.space.transform(self.X)
            try:
                if self.y.min() <= 0:
                    y = torch.FloatTensor(power_transform(self.y / self.y.std(), method = 'yeo-johnson'))
                else:
                    y = torch.FloatTensor(power_transform(self.y / self.y.std(), method = 'box-cox'))
                    if y.std() < 0.5:
                        y = torch.FloatTensor(power_transform(self.y / self.y.std(), method = 'yeo-johnson'))
                if y.std() < 0.5:
                    raise RuntimeError('Power transformation failed')
                model = get_model(self.model_name, self.space.num_numeric, self.space.num_categorical, 1, **self.model_config)
                model.fit(X, Xe, y)
            except:
                y     = torch.FloatTensor(self.y).clone()
                model = get_model(self.model_name, self.space.num_numeric, self.space.num_categorical, 1, **self.model_config)
                model.fit(X, Xe, y)

            best_id = self.get_best_id(fix_input)
            best_x  = self.X.iloc[[best_id]]
            best_y  = y.min()
            py_best, ps2_best = model.predict(*self.space.transform(best_x))
            py_best = py_best.detach().numpy().squeeze()
            ps_best = ps2_best.sqrt().detach().numpy().squeeze()

            iter  = max(1, self.X.shape[0] // n_suggestions)
            upsi  = 0.5
            delta = 0.01
            # kappa = np.sqrt(upsi * 2 * np.log(iter **  (2.0 + self.X.shape[1] / 2.0) * 3 * np.pi**2 / (3 * delta)))
            kappa = np.sqrt(upsi * 2 * ((2.0 + self.X.shape[1] / 2.0) * np.log(iter) + np.log(3 * np.pi**2 / (3 * delta))))

            acq = self.acq_cls(model, best_y = py_best, kappa = kappa) # LCB < py_best
            mu  = Mean(model)
            sig = Sigma(model, linear_a = -1.)
            opt = EvolutionOpt(self.space, acq, pop = 100, iters = 100, verbose = False, es=self.es)
            rec = opt.optimize(initial_suggest = best_x, fix_input = fix_input).drop_duplicates()
            rec = rec[self.check_unique(rec)]

            cnt = 0
            while rec.shape[0] < n_suggestions:
                rand_rec = self.quasi_sample(n_suggestions - rec.shape[0], fix_input)
                rand_rec = rand_rec[self.check_unique(rand_rec)]
                rec      = pd.concat([rec, rand_rec], axis = 0, ignore_index = True)
                cnt +=  1
                if cnt > 3:
                    # sometimes the design space is so small that duplicated sampling is unavoidable
                    break 
            if rec.shape[0] < n_suggestions:
                rand_rec = self.quasi_sample(n_suggestions - rec.shape[0], fix_input)
                rec      = pd.concat([rec, rand_rec], axsi = 0, ignore_index = True)

            select_id = np.random.choice(rec.shape[0], n_suggestions, replace = False).tolist()
            x_guess   = []
            with torch.no_grad():
                py_all       = mu(*self.space.transform(rec)).squeeze().numpy()
                ps_all       = -1 * sig(*self.space.transform(rec)).squeeze().numpy()
                best_pred_id = np.argmin(py_all)
                best_unce_id = np.argmax(ps_all)
                if best_unce_id not in select_id and n_suggestions > 2:
                    select_id[0]= best_unce_id
                if best_pred_id not in select_id and n_suggestions > 2:
                    select_id[1]= best_pred_id
                rec_selected = rec.iloc[select_id].copy()
            return rec_selected

    def check_unique(self, rec : pd.DataFrame) -> [bool]:
        return (~pd.concat([self.X, rec], axis = 0).duplicated().tail(rec.shape[0]).values).tolist()

    def observe(self, X, y):
        """Feed an observation back.

        Parameters
        ----------
        X : pandas DataFrame
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,1)
            Corresponding values where objective has been evaluated
        """
        valid_id = np.where(np.isfinite(y.reshape(-1)))[0].tolist()
        XX       = X.iloc[valid_id]
        yy       = y[valid_id].reshape(-1, 1)
        self.X   = pd.concat([self.X, XX], axis = 0, ignore_index = True)
        self.y   = np.vstack([self.y, yy])

    @property
    def best_x(self)->pd.DataFrame:
        if self.X.shape[0] == 0:
            raise RuntimeError('No data has been observed!')
        else:
            return self.X.iloc[[self.y.argmin()]]

    @property
    def best_y(self)->float:
        if self.X.shape[0] == 0:
            raise RuntimeError('No data has been observed!')
        else:
            return self.y.min()


================================================
FILE: transopt/optimizer/model/mhgp.py
================================================
# Copyright (c) 2021 Robert Bosch GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import copy
import numpy as np
from typing import Dict, Hashable, Union, Sequence, Tuple, List

from GPy.kern import RBF
from GPy.kern import Kern, RBF
from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.model_base import Model
from transopt.agent.registry import model_registry

@model_registry.register("MHGP")
class MHGP(Model):
    """Stack of Gaussian processes.

    Transfer Learning model based on [Golovin et al: Google Vizier: A Service for
    Black-Box Optimization](https://dl.acm.org/doi/abs/10.1145/3097983.3098043).
    Given a list of source data sets, the
    transfer to the target data set is done by training a separate GP for each data set
    whose prior mean function is the posterior mean function of the previous GP in the
    stack.
    """

    def __init__(self,         
        kernel: Kern = None,
        noise_variance: float = 1.0,
        normalize: bool = True,
        **options: dict):
        """Initialize the Method.

        Args:
            n_features: Number of input parameters of the data.
            within_model_normalize: Normalize each GP internally. Helpful for
                numerical stability.
        """
        super().__init__()

        self._normalize = normalize
        self._kernel = kernel
        self._noise_variance = noise_variance
        self.n_samples = 0

        self.source_gps = []

        # GP on difference between target data and last source data set
        self.target_gp = None

    def _compute_residuals(self, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
        """Determine the difference between given y-values and the sum of predicted
        values from the models in 'source_gps'.

        Args:
            data: Observation (input and target) data.
                Input data: ndarray, `shape = (n_points, n_features)`
                Target data: ndarray, `shape = (n_points, 1)`

        Returns:
            Difference between observed values and sum of predicted values
            from `source_gps`. `shape = (n_points, 1)`
        """
        if self.n_features != X.shape[1]:
            raise ValueError("Number of features in model and input data mismatch.")

        if not self.source_gps:
            return Y

        predicted_y = self.predict_posterior_mean(
            X, idx=len(self.source_gps) - 1
        )

        residuals = Y - predicted_y

        return residuals

    def _update_meta_data(self, *gps: GP):
        """Cache the meta data after meta training."""
        for gp in gps:
            self.source_gps.append(gp)

    def _meta_fit_single_gp(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        optimize: bool,
    ) -> GP:
        """Train a new source GP on `data`.

        Args:
            data: The source dataset.
            optimize: Switch to run hyperparameter optimization.

        Returns:
            The newly trained GP.
        """
        self.n_features = X.shape[1]
        
        residuals = self._compute_residuals(X, Y)
        
        kernel = RBF(self.n_features, ARD=True)
        new_gp = GP(
            kernel, noise_variance=self._noise_variance
        )
        new_gp.fit(
            X = X,
            Y = residuals,
            optimize = optimize,
        )
        return new_gp

    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        optimize: Union[bool, Sequence[bool]] = True,
    ):
        """Train the source GPs on the given source data.

        Args:
            source_datasets: Dictionary containing the source datasets. The stack of GPs
                are trained on the residuals between two consecutive data sets in this
                list.
            optimize: Switch to run hyperparameter optimization.
        """
        assert isinstance(optimize, bool) or isinstance(optimize, list)
        if isinstance(optimize, list):
            assert len(source_X) == len(optimize)
        optimize_flag = copy.copy(optimize)

        if isinstance(optimize_flag, bool):
            optimize_flag = [optimize_flag] * len(source_X)

        for i in range(len(source_X)):
            new_gp = self._meta_fit_single_gp(
                source_X[i],
                source_Y[i],
                optimize=optimize_flag[i],
            )
            self._update_meta_data(new_gp)


    def fit(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        optimize: bool = False,
    ):
        if not self.source_gps:
            raise ValueError(
                "Error: source gps are not trained. Forgot to call `meta_fit`."
            )

        self._X = copy.deepcopy(X)
        self._y = copy.deepcopy(Y)
        
        self.n_samples, n_features = self._X.shape
        if self.n_features != n_features:
            raise ValueError("Number of features in model and input data mismatch.")
        
        if self.target_gp is None:
            self.target_gp = GP(
            RBF(self.n_features, ARD=True),
            noise_variance=0.1,
        )

        residuals = self._compute_residuals(X, Y)

        self.target_gp.fit(X, residuals, optimize)

    def predict(
        self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        if not self.source_gps:
            raise ValueError(
                "Error: source gps are not trained. Forgot to call `meta_fit`."
            )

        # returned mean: sum of means of the predictions of all source and target GPs
        mu = self.predict_posterior_mean(X)

        # returned variance is the variance of target GP
        _, var = self.target_gp.predict(
            X, return_full=return_full, with_noise=with_noise
        )

        return mu, var

    def predict_posterior_mean(self, X: np.ndarray, idx: int = None) -> np.ndarray:
        """Predict the mean function for given test point(s).

        For `idx=None` returns the same as `self.predict(data)[0]` but avoids the
        overhead coming from predicting the variance. If `idx` is specified, returns
        the sum of all the means up to the `idx`-th GP. Useful for inspecting the inner
        state of the stack.

        Args:
            data: Input data to predict on.
                Data is provided as ndarray with shape = (n_points, n_features).
            idx: Integer of the GP in the stack. Counting starts from the bottom at
                zero. If `None`, the mean prediction of the entire stack is returned.

        Returns:
            Predicted mean for every input. `shape = (n_points, 1)`
        """

        all_gps = self.source_gps + [self.target_gp]

        if idx is None:  # if None, the target GP is considered
            idx = len(all_gps) - 1

        mu = np.zeros((X.shape[0], 1))
        # returned mean is a sum of means of the predictions of all GPs below idx
        for model in all_gps[: idx + 1]:
            mu += model.predict_posterior_mean(X)

        return mu

    def predict_posterior_covariance(self, x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
        """Posterior covariance between two inputs.

        Args:
            x1: First input to be queried. `shape = (n_points_1, n_features)`
            x2: Second input to be queried. `shape = (n_points_2, n_features)`

        Returns:
            Posterior covariance at `(x1, x2)`. `shape = (n_points_1, n_points_2)`
        """
        return self.target_gp.predict_posterior_covariance(x1, x2)
    
    def get_fmin(self):

        return np.min(self._y)

================================================
FILE: transopt/optimizer/model/mlp.py
================================================
import os
from typing import List, Tuple

import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.datasets.utils as dataset_utils
from PIL import Image
from sklearn.model_selection import KFold, train_test_split
from torch.autograd import grad
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms

from transopt.agent.registry import model_registry
from transopt.optimizer.model.model_base import Model


def compute_irm_penalty(losses, dummy):
    g1 = grad(losses[0::2].mean(), dummy, create_graph=True)[0]
    g2 = grad(losses[1::2].mean(), dummy, create_graph=True)[0]
    return (g1 * g2).sum()

class Net(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.3):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        logits = self.fc4(x)
        return logits


@model_registry.register('MLP')
class MLP(Model):
    def __init__(self, config):
        super().__init__()
        self._model = None
        use_cuda = torch.cuda.is_available()
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self._batch_size = 16
        self._dropout_rate = 0.3
        self._best_model_state = None
        self._best_val_loss = float('inf')
    
    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        pass

    def fit(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        epochs : int = 50,
        optimize: bool = False,
    ):
        self._X = np.copy(X)
        self._y = np.copy(Y)
        self._Y = np.copy(Y)

        _X = np.copy(self._X)
        _y = np.copy(self._y)
        
        X_tensor = torch.tensor(_X, dtype=torch.float32)
        y_tensor = torch.tensor(_y, dtype=torch.float32).view(-1, 1)
        
        X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.1, random_state=42)

        X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
        X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
        y_val_tensor = torch.tensor(y_val, dtype=torch.float32)

        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

        train_loader = DataLoader(train_dataset, batch_size=self._batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=self._batch_size, shuffle=False)

        
        patience = 5
        patience_counter = 0

        train_losses = []
        val_losses = []

        for epoch in range(epochs):
            if self._model is None or patience_counter >= patience:
                self._model = Net(input_dim=X_train.shape[1], dropout_rate=self._dropout_rate).to(self.device)
                self._optimizer = optim.Adam(self._model.parameters(), lr=0.0001, weight_decay=1e-5)
                patience_counter = 0

            self._model.train()
            train_loss = 0
            for data, target in train_loader:
                data, target = data.to(self.device), target.to(self.device)
                self._optimizer.zero_grad()
                output = self._model(data)
                loss = F.mse_loss(output, target)
                loss.backward()
                self._optimizer.step()
                train_loss += loss.item()

            train_loss /= len(train_loader)
            train_losses.append(loss.item())

            self._model.eval()
            val_loss = 0
            with torch.no_grad():
                for data, target in val_loader:
                    data, target = data.to(self.device), target.to(self.device)
                    output = self._model(data)
                    loss = F.mse_loss(output, target)
                    val_loss += loss.item()

            val_loss /= len(val_loader)
            val_losses.append(loss.item())
            
            print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

            if val_loss < self._best_val_loss:
                self._best_val_loss = val_loss
                self._best_model_state = self._model.state_dict()
                patience_counter = 0
            else:
                patience_counter += 1
        
        if self._best_model_state:
            self._model.load_state_dict(self._best_model_state)
        self.save_plots(train_losses, val_losses, X_val_tensor, y_val_tensor, 'output_plots', iter_num=_X.shape[0])


    def predict(
        self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        
        data = torch.tensor(X, dtype=torch.float32).to(self.device)
        self._model.eval()
        with torch.no_grad():
            output = self._model(data)
        output = output.to('cpu')
        output = output.numpy()
        variance = np.zeros(shape=(output.shape[0], 1))
        return output, variance
        
    def get_fmin(self):
        
        return np.min(self._y)
    
    
    def save_plots(self, train_losses, val_losses, X_val, y_val, output_dir, iter_num):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # 保存损失曲线图
        plt.figure(figsize=(10, 5))
        plt.plot(train_losses, label='Train Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.title('Train and Validation Loss Over Epochs')
        plt.savefig(os.path.join(output_dir, f'loss_plot_{iter_num}.png'))
        plt.close()

        # 保存预测值与真实值的对比图
        self._model.eval()
        with torch.no_grad():
            predictions = self._model(X_val.to(self.device)).cpu().numpy()
        plt.figure(figsize=(10, 5))
        plt.plot(range(len(y_val)), y_val.cpu().numpy(), label='True Values')
        plt.plot(range(len(predictions)), predictions, label='Predictions')
        plt.xlabel('Samples')
        plt.ylabel('Values')
        plt.legend()
        plt.title('Predictions vs True Values')
        plt.savefig(os.path.join(output_dir, f'predictions_vs_true_plot_{iter_num}.png'))
        plt.close()

================================================
FILE: transopt/optimizer/model/model_base.py
================================================
from abc import abstractmethod, ABC
from typing import Dict, Hashable
import numpy as np


class Model(ABC):
    """Abstract model class."""

    def __init__(self):
        """Initializes base model."""
        self._X = None
        self._Y = None

    @property
    def X(self) -> np.ndarray:
        """Return input data."""
        return self._X

    @property
    def y(self) -> np.ndarray:
        """Return target data."""
        return self._Y

    @abstractmethod
    def meta_fit(self, metadata, **kwargs):
        """Train model on historical data.

        Parameters:
        -----------
        metadata
            Dictionary containing a numerical representation of the meta-data that can
            be used to meta-train a model for each task.
        """
        pass

    @abstractmethod
    def fit(self, X, Y, **kwargs):
        """Adjust model parameter to the observation on the new dataset.

        Parameters:
        -----------
        data: TaskData
            Observation data.
        """
        pass

    @abstractmethod
    def predict(self, X) -> (np.ndarray, np.ndarray):
        """Predict outcomes for a given array of input values.

        Parameters:
        -----------
        data: InputData
            Input data to predict on.

        Returns
        -------
        mu: shape = (n_points, 1)
            Predicted mean for every input
        cov: shape = (n_points, n_points) or (n_points, 1)
            Predicted (co-)variance for every input
        """
        pass
    

================================================
FILE: transopt/optimizer/model/moeadego.py
================================================
import numpy as np
from GPy.kern import RBF, Kern
from sklearn.preprocessing import StandardScaler

from transopt.agent.registry import model_registry
from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.model_base import Model
from transopt.utils.weights import init_weight, tchebycheff


@model_registry.register("MOEAD-EGO")
class MoeadEGO(Model):
    def __init__(
        self,
        num_objective: int,
        name="MoeadEGO",
        num_weights=10,
        seed=0,
        normalize: bool = True,
        **options: dict
    ):
        super().__init__()
        self.name = name
        self.num_weights = num_weights
        self.num_objective = num_objective
        self.normalize = normalize
        self.seed = seed
        self.weights = init_weight(self.num_objective, self.num_weights)
        self.models = []
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None
        self._options = options
        self._initialize_weights()

    def fit(self, X, Y):
        self._X = np.copy(X)
        self._Y = np.copy(Y)
        if self.normalize:
            X = self._x_normalizer.fit_transform(X)
            Y = self._y_normalizer.fit_transform(Y)
        self._update_model(X, Y)

    def predict(self, X, full_cov=False):
        return self._make_prediction(X, full_cov)

    def _create_model(self, X, Y):
        self.models = []
        ideal_point = np.min(Y.T, axis=0)
        for i, weight in enumerate(self.weights):
            kernel = RBF(input_dim=X.shape[1])
            Y_weighted = tchebycheff(Y.T, weight, ideal=ideal_point)
            model = GP(
            kernel, noise_variance=self._noise_variance
            )
            model.fit(
                X = X,
                Y = Y_weighted,
                optimize = True,
            )
            model[".*Gaussian_noise.variance"].constrain_fixed(1.0e-4)
            model[".*rbf.variance"].constrain_fixed(1.0)
            self.models.append(model)

    def _update_model(self, X, Y):
        if not self.models:
            self._create_model(X, Y)
        else:
            ideal_point = np.min(Y.T, axis=0)
            for i, model in enumerate(self.models):
                Y_weighted = tchebycheff(Y.T, self.weights[i], ideal=ideal_point)
                model.set_XY(X, Y_weighted[:, np.newaxis])

        try:
            for model in self.models:
                model.optimize_restarts(
                    num_restarts=1, verbose=self.verbose, robust=True
                )
        except np.linalg.linalg.LinAlgError as e:
            print("Error during model optimization: ", e)

    def _make_prediction(self, X, full_cov=False):
        pred_mean = np.zeros((X.shape[0], 0))
        pred_var = (
            np.zeros((X.shape[0], 0))
            if not full_cov
            else np.zeros((0, X.shape[0], X.shape[0]))
        )

        for model in self.model_list:
            mean, var = model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var

    def _make_prediction_by_id(self, X, idx, full_cov=False):
        pred_mean = np.zeros((X.shape[0], 0))
        if full_cov:
            pred_var = np.zeros((0, X.shape[0], X.shape[0]))
        else:
            pred_var = np.zeros((X.shape[0], 0))
            mean, var = self.model_list[idx].predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        return pred_mean, pred_var


================================================
FILE: transopt/optimizer/model/mtgp.py
================================================
# Copyright (c) 2021 Robert Bosch GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import copy
from typing import Dict, List, Tuple

import numpy as np
from GPy.kern import Kern, RBF
from GPy.models import GPCoregionalizedRegression
from GPy.util.multioutput import ICM

from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.utils import is_pd, nearest_pd
from transopt.agent.registry import model_registry

@model_registry.register("MTGP")
class MTGP(GP):
    r"""Multi-Task-Single-k GP, a GP-based transfer-learning algorithm.

    Multi-Task-Single-kGP models the source and target data on an equal footing with no
    explicit hierarchy. Correlations within tasks are assumed to be different than
    those across tasks. Also known as coregionalized regression model,
    Multi-Task-Single-k GP models the data with a kernel of the form

    $$
        \begin{bmatrix}
        k((x, s), (x', s)) &  k((x, s), (x', t)) \\
        k((x, t), (x', s)) &  k((x, t), (x', t))
        \end{bmatrix}
        =
        \begin{bmatrix}
        W_{ss} & W_{st} \\
        W_{st} & W_{tt}
        \end{bmatrix}
        k(x, x'),
    $$

    where $\mathbf{W}$ is a positive semi-definite matrix also known as
    coregionalization matrix.

    Multi-Task-Single-k GP is a powerful but computationally expensive Method since (i)
    it scales cubically with the total number of data points and (ii) the number of
    hyperparameters scales quadratically with the number of tasks.
    """

    def __init__(
        self,
        kernel: Kern = None,
        noise_variance: float = 1.0,
        normalize: bool = False,
        **options: dict,
    ):
        super().__init__(kernel, noise_variance, normalize, **options)
        self._normalize = normalize
        self._kernel = kernel
        self._multikernel = None
        self._gpy_model = None
        self._noise_variance = []
        self.n_sources = None
        self.n_features = None

        self._metadata_x = []
        self._metadata_y = []

        self._options = options

    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        data_X = copy.deepcopy(source_X)
        data_Y = copy.deepcopy(source_Y)
        self.n_sources = len(data_X)

        # create list of input/observed values from source data
        for i in range(self.n_sources):
            self._metadata_x = self._metadata_x + data_X
            self._metadata_y = self._metadata_y + data_Y
        self.n_features = self._metadata_x[0].shape[-1]

    def fit(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        optimize: bool = False,
    ):
        if not self._metadata_x:
            raise ValueError(
                "Error: source data not available. Forgot to call `meta_fit`."
            )

        self._X = np.copy(X)
        self._y = np.copy(Y)

        # add target data to the list of input/observed values
        x_list = copy.deepcopy(self._metadata_x)
        y_list = copy.deepcopy(self._metadata_y)
        x_list.append(X)
        y_list.append(Y)

        if self._normalize:
            # add source order to data lists
            for i in range(len(x_list)):
                x_list[i] = np.hstack(
                    [x_list[i], np.zeros((x_list[i].shape[0], 1)) + i]
                )
                y_list[i] = np.hstack(
                    [y_list[i], np.zeros((y_list[i].shape[0], 1)) + i]
                )
            # merge all data into one array, normalize data
            x_all = np.vstack(x_list)
            x_all[:, :-1] = self._x_normalizer.fit_transform(x_all[:, :-1])
            y_all = np.vstack(y_list)
            y_all[:, :-1] = self._y_normalizer.fit_transform(y_all[:, :-1])
            # transform data back to original list of arrays
            for i in range(len(x_list)):
                x_list[i] = x_all[np.where(x_all[:, -1] == i)][:, :-1]
                y_list[i] = y_all[np.where(y_all[:, -1] == i)][:, :-1]

        # define multiple output kernel
        if self._kernel is None:
            self._kernel = RBF(self.n_features)
        multikernel = ICM(
            input_dim=self.n_features,
            num_outputs=self.n_sources + 1,
            kernel=self._kernel,
        )

        # fit model to current data
        self._gpy_model = GPCoregionalizedRegression(x_list, y_list, kernel=multikernel)

        if optimize:
            optimize_restarts_options = self._options.get(
                "optimize_restarts_options", {}
            )

            kwargs = copy.deepcopy(optimize_restarts_options)

            if "verbose" not in optimize_restarts_options:
                kwargs["verbose"] = False

            self._gpy_model.optimize_restarts(**kwargs)

        self._multikernel = self._gpy_model.kern.copy()

        # noise variance: each element corresponds to the noise of one output
        self._noise_variance = self._gpy_model.likelihood.param_array

    def _raw_predict(
        self, X: np.ndarray, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:

        _X = X.copy()

        if self._X is None:
            mu = np.zeros((_X.shape[0], 1))
            cov = self._kernel.K(_X)
            var = np.diag(cov)[:, None]
            return mu, cov if return_full else var

        if self._normalize:
            _X = self._x_normalizer.transform(_X)

        # predictions are made for the last output, which corresponds to the target;
        # prepare extended input format + associated noise model
        _X_test = np.hstack([_X, np.ones((_X.shape[0], 1)) * self.n_sources])
        noise_dict = {"output_index": _X_test[:, -1:].astype(int)}

        # ensure that no negative variance is predicted
        mu, cov = self._gpy_model.predict(
            _X_test,
            full_cov=return_full,
            include_likelihood=with_noise,
            Y_metadata=noise_dict,
        )
        if return_full:
            if not is_pd(cov):
                cov = nearest_pd(cov)
        else:
            cov = np.clip(cov, 1e-20, None)
        return mu, cov


================================================
FILE: transopt/optimizer/model/neuralprocess.py
================================================
import copy
import numpy as np
from typing import Dict, Hashable, Union, Sequence, Tuple, List

from transopt.optimizer.model.model_base import Model
from transopt.agent.registry import model_registry

@model_registry.register("NeuralProcess")
class NeuralProcess(Model):
    def __init__(self):
        super().__init__()
        
    
================================================
FILE: transopt/optimizer/model/parego.py
================================================
import GPy
import numpy as np
from sklearn.preprocessing import StandardScaler

from transopt.agent.registry import model_registry
from transopt.optimizer.model.model_base import Model


@model_registry.register("ParEGO")
class ParEGO(Model):
    def __init__(self, seed=0, normalize=True, **options):
        super().__init__()
        self.seed = seed
        self.normalize = normalize
        self.models = None
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None
        self._options = options
        self.rho = 0.1

    def fit(self, X, Y):
        self._X = np.copy(X)
        self._Y = np.copy(Y)
        if self.normalize:
            X = self._x_normalizer.fit_transform(X)
            Y = self._y_normalizer.fit_transform(Y)
        self._update_model(X, Y)

    def predict(self, X, full_cov=False):
        return self._make_prediction(X, full_cov)

    def _scalarization(self, Y: np.ndarray, rho):
        theta = np.random.random_sample(Y.shape[0])
        sum_theta = np.sum(theta)
        theta = theta / sum_theta

        theta_f = Y.T * theta
        max_k = np.max(theta_f, axis=1)
        rho_sum_theta_f = rho * np.sum(theta_f, axis=1)

        return max_k + rho_sum_theta_f
    
    def _create_model(self, X, Y):
        kernel = GPy.kern.RBF(input_dim=X.shape[1])
        model = GPy.models.GPRegression(X, Y, kernel=kernel, normalizer=None)
        model[".*Gaussian_noise.variance"].constrain_fixed(1.0e-4)
        model[".*rbf.variance"].constrain_fixed(1.0)
        self.model = model

    def _update_model(self, X, Y):
        Y_scalar = self._scalarization(Y, self.rho)[:, np.newaxis]
        
        if not self.model:
            self._create_model(X, Y_scalar)
        else:
            self.model.set_XY(X, Y_scalar)
         
        try:
            self.model.optimize_restarts(num_restarts=1, verbose=self._options.get("verbose", False), robust=True)
        except np.linalg.linalg.LinAlgError as e:
            print("Error during model optimization: ", e)
    
    def _make_prediction(self, X, full_cov=False):
        pred_mean = np.zeros((X.shape[0], 0))
        pred_var = np.zeros((X.shape[0], 0)) if not full_cov else np.zeros((0, X.shape[0], X.shape[0]))
        
        if self.model:
            mean, var = self.model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        
        return pred_mean, pred_var
 

================================================
FILE: transopt/optimizer/model/pr.py
================================================
import numpy as np
from typing import Tuple, Dict, List
from sklearn.preprocessing import StandardScaler
from transopt.optimizer.model.model_base import  Model
from transopt.optimizer.model.utils import is_pd, nearest_pd
from transopt.agent.registry import model_registry
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


@model_registry.register('PR')
class PR(Model):
    def __init__(
        self,
        degree: int = 10,
        normalize: bool = True,
        **options: dict
    ):
        super().__init__()
        self._degree = degree
        self._pr_model = None

        self._normalize = normalize
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None

        self._options = options

    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        pass

    def fit(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        optimize: bool = True,
    ):
        self._X = np.copy(X)
        self._y = np.copy(Y)
        self._Y = np.copy(Y)

        _X = np.copy(self._X)
        _y = np.copy(self._y)

        if self._normalize:
            _X = self._x_normalizer.fit_transform(_X)
            _y = self._y_normalizer.fit_transform(_y)

        if self._pr_model is None:
            self._poly_features = PolynomialFeatures(degree=self._degree)
            X_poly = self._poly_features.fit_transform(_X)
            self._pr_model = LinearRegression()
            self._pr_model.fit(X_poly, _y)
        else:
            X_poly = self._poly_features.fit_transform(_X)
            self._pr_model.fit(X_poly, _y)

    def predict(
        self,
        X: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        if X.ndim == 1:
            X = X.reshape(1, -1)

        X_poly = self._poly_features.transform(X)
        Y = self._pr_model.predict(X_poly)
        return Y, None

================================================
FILE: transopt/optimizer/model/rbfn.py
================================================
from typing import List, Tuple

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset

from transopt.agent.registry import model_registry
from transopt.optimizer.model.model_base import Model


class RegressionDataset(Dataset):
    """create a dataset that complies with PyTorch """
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        x = self.inputs[index]
        y = self.targets[index]
        return x, y


class RbfNet(nn.Module):
    def __init__(self, centers, beta):
        super(RbfNet, self).__init__()
        self.num_centers = centers.size(0)
        self.centers = nn.Parameter(centers)
        self.beta = nn.Parameter(beta)
        self.linear = nn.Linear(self.num_centers, 1)
        nn.init.xavier_uniform_(self.linear.weight)

    def kernel_fun(self, batches):
        n_input = batches.size(0)
        A = self.centers.view(self.num_centers, -1).repeat(n_input, 1, 1)
        B = batches.view(n_input, -1).unsqueeze(1).repeat(1, self.num_centers, 1)
        C = torch.exp(-self.beta.mul((A - B).pow(2).sum(2, keepdims=False).sqrt()))
        return C

    def forward(self, x):
        x = self.kernel_fun(x)
        x = self.linear(x)
        return x


class rbfn(object):
    def __init__(self, dataset, max_epoch=30, batch_size=5, lr=0.01, num_centers=5, show_details=False):
        self.max_epoch = max_epoch
        self.batch_size = batch_size
        self.lr = lr
        self.num_centers = num_centers
        self.dim = dataset.inputs.shape[1]

        # create the DataLoader for training
        self.dataset = dataset
        self.data_loader = DataLoader(dataset=dataset,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      num_workers=1)

        # cluster
        self.centers = self.cluster()
        self.beta = self.calculate_beta()
        # create Rbf network
        self.model = RbfNet(self.centers, self.beta)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.loss_fun = nn.MSELoss()
        self.avg_loss = 0
        self.show_details = show_details

    def train(self):
        self.model.train()
        for epoch in range(self.max_epoch):
            self.avg_loss = 0
            total_batch = len(self.dataset) // self.batch_size

            for i, (input, output) in enumerate(self.data_loader):
                X = Variable(input.view(-1, self.dim))
                Y = Variable(output)

                self.optimizer.zero_grad()
                Y_prediction = self.model(X)
                loss = self.loss_fun(Y_prediction, Y)
                loss.backward()
                self.optimizer.step()
                self.avg_loss += loss / total_batch
            if self.show_details:
                print("[Epoch: {:>4}] loss = {:>.9}".format(epoch + 1, self.avg_loss))
        print("[*] Training finished! Loss: {:.9f}".format(self.avg_loss))

    def predict(self, x):
        self.model.eval()
        x = torch.from_numpy(x)
        x = Variable(x)
        y = self.model(x)
        return y.data.numpy()

    def cluster(self):
        kmeans = KMeans(n_clusters=self.num_centers)
        kmeans.fit(self.dataset.inputs)
        centers = kmeans.cluster_centers_
        return torch.from_numpy(centers)

    def calculate_beta(self):
        r2 = torch.ones(1, self.num_centers)
        for i, center in enumerate(self.centers):
            distances = torch.linalg.norm(self.centers - center, axis=1)
            nearest_two_neighbors_indices = torch.argsort(distances)[:2]
            r2[0][i] = torch.sum(distances[nearest_two_neighbors_indices]**2) / 2
        beta = 1 / r2
        return beta

    def update_dataset(self, dataset):
        self.dataset = dataset
        self.data_loader = DataLoader(dataset=dataset,
                                      batch_size=self.batch_size,
                                      shuffle=True,
                                      num_workers=1)


@model_registry.register('RBFN')
class RBFN(Model):
    def __init__(
        self,
        max_epoch: int = 30,
        batch_size: int = 1,
        lr: float = 0.01,
        num_centers: int = 10,
        show_details: bool = False,
        normalize: bool = True,
        **options: dict
    ):
        super().__init__()
        self._max_epoch = max_epoch
        self._batch_size = batch_size
        self._lr = lr
        self._num_centers = num_centers
        self._rbfn_model = None
        self._show_details = show_details

        self._normalize = normalize
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None

        self._options = options

    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        pass

    def fit(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        optimize: bool = True,
    ):
        self._X = np.copy(X)
        self._y = np.copy(Y)
        self._Y = np.copy(Y)

        _X = np.copy(self._X)
        _y = np.copy(self._y)

        if self._normalize:
            _X = self._x_normalizer.fit_transform(_X)
            _y = self._y_normalizer.fit_transform(_y)

        if self._rbfn_model is None:
            dataset = RegressionDataset(torch.from_numpy(_X), torch.from_numpy(_y))
            self._rbfn_model = rbfn(
                dataset=dataset,
                max_epoch=self._max_epoch,
                batch_size=self._batch_size,
                lr=self._lr,
                num_centers=self._num_centers,
                show_details=self._show_details,
            )
        else:
            dataset = RegressionDataset(torch.from_numpy(_X), torch.from_numpy(_y))
            self._rbfn_model.update_dataset(dataset)
        
        try:
            self._rbfn_model.train()
        except np.linalg.LinAlgError as e:
            print('Error: np.linalg.LinAlgError')

    def predict(
        self,
        X: np.ndarray,
    ) -> Tuple[np.ndarray, np.ndarray]:
        if X.ndim == 1:
            X = X[None, :]
        
        Y = self._rbfn_model.predict(X)
        return Y, None

================================================
FILE: transopt/optimizer/model/rf.py
================================================
# Copyright (c) 2021 Robert Bosch GmbH
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

import copy
import numpy as np
from typing import Tuple, Dict, List
from sklearn.preprocessing import StandardScaler

from transopt.optimizer.model.model_base import  Model
from transopt.agent.registry import model_registry
from sklearn.ensemble import RandomForestRegressor


@model_registry.register('RF')
class RF(Model):
    def __init__(
        self,
        name = 'RandomForest',
        num_estimators = 100,
        seed = 0,
        normalize: bool = True,
        **options: dict
    ):
        """Initialize the Method.
        """
        super().__init__()
        self.name = name
        self.num_estimators = num_estimators

        self.model = RandomForestRegressor(
            n_estimators=100,
            max_features='sqrt',
            bootstrap=True,
            random_state=seed
        )

        self._normalize = normalize
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None

        self._options = options

    def meta_fit(
        self,
        source_X : List[np.ndarray],
        source_Y : List[np.ndarray],
        **kwargs,
    ):
        pass

    def fit(
        self,
        X: np.ndarray,
        Y: np.ndarray,
        optimize: bool = True,
    ):
        self._X = np.copy(X)
        self._y = np.copy(Y)
        self._Y = np.copy(Y)

        _X = np.copy(self._X)
        _y = np.copy(self._y)

        if self._normalize:
            _X = self._x_normalizer.fit_transform(_X)
            _y = self._y_normalizer.fit_transform(_y)
        self.model.fit(_X, _y)


    def predict(
        self, X, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        mean, var = self._raw_predict(X, return_full, with_noise)

        return mean, var

    def _raw_predict(
        self, X, return_full: bool = False, with_noise: bool = False
    ) -> Tuple[np.ndarray, np.ndarray]:
        """Predict functions distribution(s) for given test point(s) without taking into
        account data normalization. If `self._normalize` is `False`, return the same as
        `self.predict()`.

        Same input/output as `self.predict()`.
        """
        _X_test = X.copy()

        mu = self.model.predict(_X_test)
        cov = self._raw_predic_var(_X_test, self.model, mu)
        return mu[:,np.newaxis], cov[:,np.newaxis]

    def _raw_predic_var(self, X, trees, predictions, min_variance=0.0):
        # This derives std(y | x) as described in 4.3.2 of arXiv:1211.0906
        std = np.zeros(len(X))

        for tree in trees:
            var_tree = tree.tree_.impurity[tree.apply(X)]

            var_tree[var_tree < min_variance] = min_variance
            mean_tree = tree.predict(X)
            std += var_tree + mean_tree ** 2

        std /= len(trees)
        std -= predictions ** 2.0
        std[std < 0.0] = 0.0
        std = std ** 0.5
        return std

    def sample(
        self, X, size: int = 1, with_noise: bool = False
    ) -> np.ndarray:
        """Perform model inference.

        Sample functions from the posterior distribution for the given test points.

        Args:
            data: Input data to predict on. `shape = (n_points, n_features)`
            size: Number of functions to sample.
            with_noise: If `False`, the latent function `f` is considered. If `True`,
                the observed function `y` that includes the noise variance is
                considered.

        Returns:
            Sampled function value for every input. `shape = (n_points, size)`
        """
        mean, cov = self.predict(X, return_full=True, with_noise=with_noise)
        mean = mean.flatten()
        sample = np.random.multivariate_normal(mean, cov, size).T
        return sample

    def get_fmin(self):
        if self._normalize:
            return np.min(self._y_normalizer.inverse_transform(self._y))
        else:
            return np.min(self._y)

================================================
FILE: transopt/optimizer/model/rgpe.py
================================================
#Practical gaussian process
import copy
from typing import Dict, List, Union, Sequence

import GPy
import numpy as np
from GPy.kern import RBF, Kern

from transopt.agent.registry import model_registry
from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.model_base import Model


def roll_col(X: np.ndarray, shift: int) -> np.ndarray:
    """
    Rotate columns to right by shift.
    """
    return np.concatenate((X[:, -shift:], X[:, :-shift]), axis=1)


def compute_ranking_loss(
    f_samps: np.ndarray,
    target_y: np.ndarray,
    target_model: bool,
) -> np.ndarray:
    """
    Compute ranking loss for each sample from the posterior over target points.
    """
    y_stack = np.tile(target_y.reshape((-1, 1)), f_samps.shape[0]).transpose()
    rank_loss = np.zeros(f_samps.shape[0])
    if not target_model:
        for i in range(1, target_y.shape[0]):
            rank_loss += np.sum(
                (roll_col(f_samps, i) < f_samps) ^ (roll_col(y_stack, i) < y_stack),
                axis=1
            )
    else:
        for i in range(1, target_y.shape[0]):
            rank_loss += np.sum(
                (roll_col(f_samps, i) < y_stack) ^ (roll_col(y_stack, i) < y_stack),
                axis=1
            )

    return rank_loss


@model_registry.register('RGPE')
class RGPE(Model):
    def __init__(
            self,
            kernel: Kern = None,
            noise_variance: float = 1.0,
            normalize: bool = True,
            Seed = 0,
            sampling_mode: str = 'bootstrap',
            weight_dilution_strategy = 'probabilistic',
            **options: dict,
    ):
        super().__init__()
        # GP on difference between target data and last source data set
        self._noise_variance = noise_variance
        self._metadata = {}
        self._source_gps = {}
        self._source_gp_weights = {}
        self.sampling_mode = sampling_mode
        self._normalize = normalize
        self.Seed = Seed
        self.rng = np.random.RandomState(self.Seed)
        self.weight_dilution_strategy = weight_dilution_strategy

        self.target_model = None
        self._target_model_weight = 1
    
    
    def _meta_fit_single_gp(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        optimize: bool,
    ) -> GP:
        """Train a new source GP on `data`.

        Args:
            data: The source dataset.
            optimize: Switch to run hyperparameter optimization.

        Returns:
            The newly trained GP.
        """
        self.n_features = X.shape[1]
                
        kernel = RBF(self.n_features, ARD=True)
        new_gp = GP(
            kernel, noise_variance=self._noise_variance
        )
        new_gp.fit(
            X = X,
            Y = Y,
            optimize = optimize,
        )
        return new_gp
    
    def meta_fit(self,
                source_X : List[np.ndarray],
                source_Y : List[np.ndarray],
                optimize: Union[bool, Sequence[bool]] = True):
        # metadata, _ = SourceSelection.the_k_nearest(source_datasets)

        self._metadata = {'X': source_X, 'Y':source_Y}
        self._source_gps = {}
        
        
        assert isinstance(optimize, bool) or isinstance(optimize, list)
        if isinstance(optimize, list):
            assert len(source_X) == len(optimize)
        optimize_flag = copy.copy(optimize)

        if isinstance(optimize_flag, bool):
            optimize_flag = [optimize_flag] * len(source_X)
        
        for i in range(len(source_X)):
            new_gp = self._meta_fit_single_gp(
                source_X[i],
                source_Y[i],
                optimize=optimize_flag[i],
            )
            self._source_gps[i] = new_gp

        self._calculate_weights()


    def fit(self, 
            X: np.ndarray,
            Y: np.ndarray,
            optimize: bool = False):

        self._X = copy.deepcopy(X)
        self._Y = copy.deepcopy(Y)

        self.n_samples, n_features = self._X.shape
        if self.n_features != n_features:
            raise ValueError("Number of features in model and input data mismatch.")

        kern = GPy.kern.RBF(self.n_features, ARD=False)

        self.target_model = GPy.models.GPRegression(self._X, self._Y, kernel=kern)
        self.target_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)

        try:
            self.target_model.optimize_restarts(num_restarts=1, verbose=False, robust=True)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')

        self._calculate_weights()

    def predict(
        self, X, return_full: bool = False, with_noise: bool = False
    ):

        X_test = X
        n_models = len(self._source_gp_weights)
        if self._target_model_weight > 0:
            n_models += 1
        n_sample = X_test.shape[0]
        means = np.empty((n_models, n_sample, 1))
        weights = np.empty((n_models, 1))
        if return_full == False:
            vars_ = np.empty((n_models, n_sample, 1))
        else:
            vars_ = np.empty((n_models, n_sample, n_sample))
        for task_uid, weight in enumerate(self._source_gp_weights):
            means[task_uid], vars_[task_uid] = self._source_gps[task_uid].predict(X_test)
            weights[task_uid] = weight
        if self._target_model_weight > 0:
            means[-1], vars_[-1] = self.target_model.predict(X_test)
            weights[-1] = self._target_model_weight
        weights = weights[:,:,np.newaxis]
        mean = np.sum(weights * means, axis=0)
        var = np.sum(weights ** 2 * vars_, axis=0)
        return mean, var


    def _calculate_weights(self, alpha: float = 0.0):
        if len(self._source_gps) == 0:
            self._target_model_weight = 1
            return

        if self._X is None:
            weight = 1 / len(self._source_gps)
            self._source_gp_weights = [weight for task_uid in self._source_gps]
            self._target_model_weight = 0
            return
        
        kernel = RBF(self.n_features, ARD=True)
        if self.sampling_mode == 'bootstrap':
            predictions = []
            for model_idx in range(len(self._source_gps)):
                model = self._source_gps[model_idx]
                predictions.append(model.predict(self._X)[0].flatten()) # ndarray(n,)

            masks = np.eye(len(self._X), dtype=bool)
            train_x_cv = np.stack([self._X[~m] for m in masks])
            train_y_cv = np.stack([self._Y[~m] for m in masks])
            test_x_cv = np.stack([self._X[m] for m in masks])
            
            model = GP(copy.deepcopy(kernel), noise_variance=self._noise_variance)

            loo_prediction = []
            for i in range(self._Y.shape[0]):
                model.fit(train_x_cv[i], train_y_cv[i], optimize=False)
                loo_prediction.append(model.predict(test_x_cv[i])[0][0][0])
            predictions.append(loo_prediction)
            predictions = np.array(predictions)

            bootstrap_indices = self.rng.choice(predictions.shape[1],
                                           size=(self.n_samples, predictions.shape[1]),
                                           replace=True)

            bootstrap_predictions = []
            bootstrap_targets = self._Y[bootstrap_indices].reshape((self.n_samples, len(self._Y)))
            for m in range(len(self._source_gps) + 1):
                bootstrap_predictions.append(predictions[m, bootstrap_indices])

            ranking_losses = np.zeros((len(self._source_gps) + 1, self.n_samples))
            for i in range(len(self._source_gps)):

                for j in range(len(self._Y)):
                    ranking_losses[i] += np.sum(
                        (
                            roll_col(bootstrap_predictions[i], j) < bootstrap_predictions[i])
                            ^ (roll_col(bootstrap_targets, j) < bootstrap_targets
                        ), axis=1
                    )
            for j in range(len(self._Y)):
                ranking_losses[-1] += np.sum(
                    (
                        (roll_col(bootstrap_predictions[-1], j) < bootstrap_targets)
                        ^ (roll_col(bootstrap_targets, j) < bootstrap_targets)
                    ), axis=1
                )
        # elif self.sampling_mode in ['simplified', 'correct']:
        #     # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf
        #     ranking_losses = []
        #     # compute ranking loss for each base model
        #     for model_idx in range(len(self.source_gps)):
        #         model = self.source_gps[model_idx]
        #         # compute posterior over training points for target task
        #         f_samps = sample_sobol(model, self._X, self.n_samples, self.rng.randint(10000))
        #         # compute and save ranking loss
        #         ranking_losses.append(compute_ranking_loss(f_samps, self._Y, target_model=False))
        #
        #     # compute ranking loss for target model using LOOCV
        #     if self.sampling_mode == 'simplified':
        #         # Independent draw of the leave one out sample, other "samples" are noise-free and the
        #         # actual observation
        #         f_samps = get_target_model_loocv_sample_preds(self._X, self._Y, self.n_samples, target_model,
        #                                                       self.rng.randint(10000))
        #         ranking_losses.append(compute_ranking_loss(f_samps, self._Y, target_model=True))
        #     elif self.sampling_mode == 'correct':
        #         # Joint draw of the leave one out sample and the other observations
        #         ranking_losses.append(
        #             compute_target_model_ranking_loss(train_x, train_y, num_samples, target_model,
        #                                               rng.randint(10000))
        #         )
        #     else:
        #         raise ValueError(self.sampling_mode)
        else:
            raise NotImplementedError(self.sampling_mode)

        if isinstance(self.weight_dilution_strategy, int):
            weight_dilution_percentile_target = self.weight_dilution_strategy
            weight_dilution_percentile_base = 50
        elif self.weight_dilution_strategy is None or self.weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
            pass
        else:
            raise ValueError(self.weight_dilution_strategy)
        ranking_loss = np.array(ranking_losses)

        # perform model pruning
        p_drop = []
        if self.weight_dilution_strategy in ['probabilistic', 'probabilistic-ld']:
            for i in range(len(self._source_gps)):
                better_than_target = np.sum(ranking_loss[i, :] < ranking_loss[-1, :])
                worse_than_target = np.sum(ranking_loss[i, :] >= ranking_loss[-1, :])
                correction_term = alpha * (better_than_target + worse_than_target)
                proba_keep = better_than_target / (better_than_target + worse_than_target + correction_term)
                if self.weight_dilution_strategy == 'probabilistic-ld':
                    proba_keep = proba_keep * (1 - len(self._X) / float(self.number_of_function_evaluations))
                proba_drop = 1 - proba_keep
                p_drop.append(proba_drop)
                r = self.rng.rand()
                if r < proba_drop:
                    ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1
        elif self.weight_dilution_strategy is not None:
            # Use the original strategy as described in v1: https://arxiv.org/pdf/1802.02219v1.pdf
            percentile_base = np.percentile(ranking_loss[: -1, :], weight_dilution_percentile_base, axis=1)
            percentile_target = np.percentile(ranking_loss[-1, :], weight_dilution_percentile_target)
            for i in range(len(self._source_gps)):
                if percentile_base[i] >= percentile_target:
                    ranking_loss[i, :] = np.max(ranking_loss) * 2 + 1

        # compute best model (minimum ranking loss) for each sample
        # this differs from v1, where the weight is given only to the target model in case of a tie.
        # Here, we distribute the weight fairly among all participants of the tie.
        minima = np.min(ranking_loss, axis=0)
        assert len(minima) == self.n_samples
        best_models = np.zeros(len(self._source_gps) + 1)
        for i, minimum in enumerate(minima):
            minimum_locations = ranking_loss[:, i] == minimum
            sample_from = np.where(minimum_locations)[0]

            for sample in sample_from:
                best_models[sample] += 1. / len(sample_from)

        # compute proportion of samples for which each model is best
        rank_weights = best_models / self.n_samples

        self._source_gp_weights = [rank_weights[task_uid] for task_uid in self._source_gps]
        self._target_model_weight = rank_weights[-1]

        return rank_weights, p_drop

    def _calculate_weights_with_no_observations(self):
        """Calculate weights according to the given start Method when no target
        task observations exist.
        """

        first, _, _ = self._start.partition("-")

        if first == "random":
            # do nothing, predict should not yet be used
            return

        if first == "mean":
            # assign equal weights to all base models
            weight = 1 / len(self._source_gps)
            self._source_gp_weights = {
                task_uid: weight for task_uid in self._source_gps
            }
            self._target_model_weight = 0
            return

        raise RuntimeError(f"Predict called without observations, first = {first}")

    def _calculate_weights_with_one_observation(self):
        """Calculate weights according to the given start Method when only one
        unique target task observation is available.
        """

        _, _, second = self._start.partition("-")

        if second == "random":
            # do nothing, predict should not be used yet
            return

        if second == "mean":
            # assign equal weights to all base models and the target model
            weight = 1 / (len(self._source_gps) + 1)
            self._source_gp_weights = {
                task_uid: weight for task_uid in self._source_gps
            }
            self._target_model_weight = weight
            return

        if second == "weighted":
            # get unique observed point
            X, indices = np.unique(self._X, axis=0, return_index=True)

            # draw _n_samples for each unique observed point from each
            # base model
            all_samples = np.empty((len(self._source_gps), self._n_samples))
            for i, task_uid in enumerate(self._source_gps):
                model = self._source_gps[task_uid]
                samples = model.sample(
                    X, size=self._n_samples, with_noise=True
                )
                all_samples[i] = samples

            # compare drawn samples to observed values
            y = self._y[indices]
            diff = np.abs(all_samples - y)

            # get base model with lowest absolute difference for each sample
            best = np.argmin(diff, axis=0)

            # compute weight as proportion of samples where a base model is best
            occurences = np.bincount(best, minlength=len(self._source_gps))
            weights = occurences / self._n_samples
            self._source_gp_weights = dict(zip(self._source_gps, weights))
            self._target_model_weight = 0
            return

        raise RuntimeError(
            f"Weight calculation with one observation, second = {second}"
        )

    def _update_meta_data(self, *gps: GPy.models.GPRegression):
        """Cache the meta data after meta training."""
        n_models = len(self._source_gps)
        for task_uid, gp in enumerate(gps):
            self._source_gps[n_models + task_uid] = gp
    def meta_update(self):
        self._update_meta_data(self.target_model)

    def set_XY(self, Data:Dict):
        self._X = copy.deepcopy(Data['X'])
        self._Y = copy.deepcopy(Data['Y'])

    def print_Weights(self):
        print(f'Source weights:{self._source_gp_weights}')
        print(f'Target weights:{self._target_model_weight}')

    def get_Weights(self):
        weights = self._source_gp_weights.copy()
        weights.append(self._target_model_weight)
        return weights


    def loss(self, task_uid: int) -> np.ndarray:
        model = self._source_gps[task_uid]
        X = self._X
        y = self._Y
        samples = model.sample(X, size=self.n_samples, with_noise=True)
        sample_comps = samples[:, np.newaxis, :] < samples
        target_comps = np.tile(y[:, np.newaxis, :] < y, self.n_samples)
        return np.sum(sample_comps ^ target_comps, axis=(1, 0))

    def posterior_samples_f(self,X, size=10, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """


        predict_kwargs["full_cov"] = True  # Always use the full covariance for posterior samples.
        m, v = self._raw_predict(X,  **predict_kwargs)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, size=10, Y_metadata=None, likelihood=None, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """


        fsim = self.posterior_samples_f(X, size, **predict_kwargs)
        if likelihood is None:
            likelihood = self.likelihood
        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = likelihood.samples(fsim[:, d], Y_metadata=Y_metadata)
        else:
            fsim = likelihood.samples(fsim, Y_metadata=Y_metadata)
        return fsim

    def get_fmin(self):

        return np.min(self._Y)

================================================
FILE: transopt/optimizer/model/sgpt.py
================================================
import copy
from typing import Dict, List, Sequence, Union

import GPy
import numpy as np
from GPy.kern import RBF, Kern

from transopt.agent.registry import model_registry
from transopt.optimizer.model.gp import GP
from transopt.optimizer.model.model_base import Model


def roll_col(X: np.ndarray, shift: int) -> np.ndarray:
    """
    Rotate columns to right by shift.
    """
    return np.concatenate((X[:, -shift:], X[:, :-shift]), axis=1)

@model_registry.register("SGPT")
class SGPT(Model):
    def __init__(
            self,
            kernel: Kern = None,
            noise_variance: float = 1.0,
            normalize: bool = True,
            Seed = 0,
            bandwidth: float = 1,
            **options: dict,
    ):
        super().__init__()
        # GP on difference between target data and last source data set
        self._noise_variance = noise_variance
        self._metadata = {}
        self._source_gps = {}
        self._source_gp_weights = {}
        self._normalize = normalize
        self.Seed = Seed
        self.rng = np.random.RandomState(self.Seed)
        
        self._metadata = {}
        self._source_gps = {}
        self._source_gp_weights = {}
        self.bandwidth =bandwidth

        self._target_model = None
        self._target_model_weight = 1
    
    
    def _meta_fit_single_gp(
        self,
        X : np.ndarray,
        Y : np.ndarray,
        optimize: bool,
    ) -> GP:
        """Train a new source GP on `data`.

        Args:
            data: The source dataset.
            optimize: Switch to run hyperparameter optimization.

        Returns:
            The newly trained GP.
        """
        self.n_features = X.shape[1]
                
        kernel = RBF(self.n_features, ARD=True)
        new_gp = GP(
            kernel, noise_variance=self._noise_variance
        )
        new_gp.fit(
            X = X,
            Y = Y,
            optimize = optimize,
        )
        return new_gp
    
    def meta_fit(self,
            source_X : List[np.ndarray],
            source_Y : List[np.ndarray],
            optimize: Union[bool, Sequence[bool]] = True):
        # metadata, _ = SourceSelection.the_k_nearest(source_datasets)

        self._metadata = {'X': source_X, 'Y':source_Y}
        self._source_gps = {}
        
        
        assert isinstance(optimize, bool) or isinstance(optimize, list)
        if isinstance(optimize, list):
            assert len(source_X) == len(optimize)
        optimize_flag = copy.copy(optimize)

        if isinstance(optimize_flag, bool):
            optimize_flag = [optimize_flag] * len(source_X)
        
        for i in range(len(source_X)):
            new_gp = self._meta_fit_single_gp(
                source_X[i],
                source_Y[i],
                optimize=optimize_flag[i],
            )
            self._source_gps[i] = new_gp

        self._calculate_weights()


    def fit(self, 
            X: np.ndarray,
            Y: np.ndarray,
            optimize: bool = False):

        self._X = copy.deepcopy(X)
        self._Y = copy.deepcopy(Y)

        self.n_samples, n_features = self._X.shape
        if self.n_features != n_features:
            raise ValueError("Number of features in model and input data mismatch.")

        kern = GPy.kern.RBF(self.n_features, ARD=False)

        self._target_model = GPy.models.GPRegression(self._X, self._Y, kernel=kern)
        self._target_model['Gaussian_noise.*variance'].constrain_bounded(1e-9, 1e-3)

        try:
            self._target_model.optimize_restarts(num_restarts=1, verbose=False, robust=True)
        except np.linalg.linalg.LinAlgError as e:
            # break
            print('Error: np.linalg.linalg.LinAlgError')

        self._calculate_weights()


    def predict(self, X, return_full: bool = False, with_noise: bool = False):
        X_test = X
        n_models = len(self._source_gp_weights)
        if self._target_model_weight > 0:
            n_models += 1
        n_sample = X_test.shape[0]
        means = np.empty((n_models, n_sample, 1))
        weights = np.empty((n_models, n_sample))
        if return_full == False:
            vars_ = np.empty((n_models, n_sample, 1))
        else:
            vars_ = np.empty((n_models, n_sample, n_sample))
        for task_uid, weight in enumerate(self._source_gp_weights):
            means[task_uid], vars_[task_uid] = self._source_gps[task_uid].predict(X_test)
            weights[task_uid] = weight
        if self._target_model_weight > 0:
            means[-1], vars_[-1] = self._target_model.predict(X_test)
            weights[-1] = self._target_model_weight

        weights = weights[:,:,np.newaxis]
        mean = np.sum(weights * means, axis=0)
        return mean, vars_[-1]

    def Epanechnikov_kernel(self, X1, X2):
        diff_matrix = X1 - X2
        u = np.linalg.norm(diff_matrix, ord=2) / self.bandwidth**2  # 计算归一化距离
        if u < 1:
            weight = 0.75 * (1 - u**2)  # 根据 Epanechnikov 核计算权重
        else:
            weight = 0 
        return weight
    
    def _calculate_weights(self, alpha: float = 0.0):
        if self._X is None:
            weight = 1 / len(self._source_gps)
            self._source_gp_weights = [weight for task_uid in self._source_gps]
            self._target_model_weight = 0
            return

        predictions = []
        for model_idx in range(len(self._source_gps)):
            model = self._source_gps[model_idx]
            predictions.append(model.predict(self._X)[0].flatten())  # ndarray(n,)


        predictions.append(self._target_model.predict(self._X)[0].flatten())
        predictions = np.array(predictions)

        bootstrap_indices = self.rng.choice(predictions.shape[1],
                                            size=(self.n_samples, predictions.shape[1]),
                                            replace=True)

        bootstrap_predictions = []
        bootstrap_targets = self._Y[bootstrap_indices].reshape((self.n_samples, len(self._Y)))
        for m in range(len(self._source_gps) + 1):
            bootstrap_predictions.append(predictions[m, bootstrap_indices])

        ranking_losses = np.zeros((len(self._source_gps) + 1, self.n_samples))
        for i in range(len(self._source_gps)):
            for j in range(1, len(self._Y)):
                ranking_losses[i] += np.sum(
                    (
                        ~(roll_col(bootstrap_predictions[i], j) < bootstrap_predictions[i])
                    ^ (roll_col(bootstrap_targets, j) < bootstrap_targets)
                       ), axis=1

                )
        for j in range(1, len(self._Y)):
            ranking_losses[-1] += np.sum(
                (
                        ~((roll_col(bootstrap_predictions[-1], j) < bootstrap_targets)
                        ^ (roll_col(bootstrap_targets, j) < bootstrap_targets))
                ), axis=1
            )
        total_compare = len(self._Y) *(len(self._Y - 1))
        ranking_loss = np.array(ranking_losses) / total_compare

        weights = [self.Epanechnikov_kernel(ranking_loss[task_uid], ranking_loss[-1]) for task_uid in self._source_gps]
        weights.append(1.0)
        weights = np.array(weights)/np.sum(weights)
        self._source_gp_weights = [weights[task_uid] for task_uid in self._source_gps]
        self._target_model_weight = weights[-1]

    def posterior_samples_f(self,X, size=10, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: The points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim)
        :param size: the number of a posteriori samples.
        :type size: int.
        :returns: set of simulations
        :rtype: np.ndarray (Nnew x D x samples)
        """


        predict_kwargs["full_cov"] = True  # Always use the full covariance for posterior samples.
        m, v = self._raw_predict(X,  **predict_kwargs)

        def sim_one_dim(m, v):
            return np.random.multivariate_normal(m, v, size).T

        return sim_one_dim(m.flatten(), v)[:, np.newaxis, :]


    def posterior_samples(self, X, size=10, Y_metadata=None, likelihood=None, **predict_kwargs):
        """
        Samples the posterior GP at the points X.

        :param X: the points at which to take the samples.
        :type X: np.ndarray (Nnew x self.input_dim.)
        :param size: the number of a posteriori samples.
        :type size: int.
        :param noise_model: for mixed noise likelihood, the noise model to use in the samples.
        :type noise_model: integer.
        :returns: Ysim: set of simulations,
        :rtype: np.ndarray (D x N x samples) (if D==1 we flatten out the first dimension)
        """


        fsim = self.posterior_samples_f(X, size, **predict_kwargs)
        if likelihood is None:
            likelihood = self.likelihood
        if fsim.ndim == 3:
            for d in range(fsim.shape[1]):
                fsim[:, d] = likelihood.samples(fsim[:, d], Y_metadata=Y_metadata)
        else:
            fsim = likelihood.samples(fsim, Y_metadata=Y_metadata)
        return fsim

    def get_fmin(self):

        return np.min(self._Y)

================================================
FILE: transopt/optimizer/model/smsego.py
================================================
import GPy
import numpy as np
from sklearn.preprocessing import StandardScaler

from transopt.agent.registry import model_registry
from transopt.optimizer.model.model_base import Model


@model_registry.register("SMSEGO")
class SMSEGO(Model):
    def __init__(self, seed=0, normalize=True, **options):
        super().__init__()
        self.seed = seed
        self.normalize = normalize
        self.models = []
        self._x_normalizer = StandardScaler() if normalize else None
        self._y_normalizer = StandardScaler() if normalize else None
        self._options = options
        np.random.seed(self.seed)

    def fit(self, X, Y):
        self._X = np.copy(X)
        self._Y = np.copy(Y)
        if self.normalize:
            X = self._x_normalizer.fit_transform(X)
            Y = self._y_normalizer.fit_transform(Y.T).T  # Transpose Y to normalize across objectives
        self._create_model(X, Y)

    def predict(self, X, full_cov=False):
        return self._make_prediction(X, full_cov)

    def _create_model(self, X, Y):
        for i in range(self.num_objective):
            kernel = GPy.kern.RBF(input_dim=X.shape[1])
            model = GPy.models.GPRegression(X, Y[i][:, np.newaxis], kernel=kernel)
            model[".*Gaussian_noise.variance"].constrain_fixed(1.0e-4)
            model[".*rbf.variance"].constrain_fixed(1.0)
            self.models.append(model)

    def _update_model(self, X, Y):
        if not self.models:
            self._create_model(X, Y)
        else:
            for i, model in enumerate(self.models):
                model.set_XY(X, Y[i][:, np.newaxis])
        
        try:
            for model in self.models:
                model.optimize_restarts(num_restarts=1, verbose=self._options.get("verbose", False), robust=True)
        except np.linalg.linalg.LinAlgError as e:
            print("Error during model optimization: ", e)

    def _make_prediction(self, X, full_cov=False):
        if len(X.shape) == 1:
            X = X[np.newaxis, :]
        pred_mean = np.zeros((X.shape[0], 0))
        pred_var = np.zeros((X.shape[0], 0)) if not full_cov else np.zeros((0, X.shape[0], X.shape[0]))
        
        for model in self.models:
            mean, var = model.predict(X, full_cov=full_cov)
            pred_mean = np.append(pred_mean, mean, axis=1)
            if full_cov:
                pred_var = np.append(pred_var, [var], axis=0)
            else:
                pred_var = np.append(pred_var, var, axis=1)
        
        return pred_mean, pred_var


================================================
FILE: transopt/optimizer/model/utils.py
================================================
import itertools
from typing import List, Union, Tuple

import numpy as np
import scipy
from GPy.kern import Fixed, BasisFuncKernel


def is_pd(a: np.ndarray) -> bool:
    """Check whether matrix `a` is positive definite via Cholesky decomposition.

    Args:
        a: Input matrix.

    Returns:
        `True` if input matrix is positive-definite, `False` otherwise.
    """

    try:
        _ = np.linalg.cholesky(a)
        return True
    except np.linalg.LinAlgError:
        return False


def nearest_pd(a: np.ndarray) -> np.ndarray:
    """Calculate the nearest positive-definite matrix to a given symmetric matrix `a`.

    Nearest is defined by the Frobenius norm.

    Args:
        a: Symmetric matrix. `shape = (n, n)`

    Returns:
        The nearest positive-definite matrix to the input symmetric matrix `a`.
        `shape = (n, n)`
    """
    # compute eigendecomposition of symmetric matrix a
    w, v = np.linalg.eigh(a)

    # account for floating-point accuracy
    spacing = np.spacing(np.linalg.norm(a))

    # clip the eigenvalues at zero
    wp = np.clip(w, spacing, None)

    return np.dot(v, np.dot(np.diag(wp), np.transpose(v)))


def compute_cholesky(matrix: np.ndarray) -> np.ndarray:
    """Calculate the Cholesky decomposition of a matrix.

    If the matrix is singular, a small constant is added to the diagonal of the matrix.
    This Method is therefore useful for the calculation of GP posteriors.

    Args:
        matrix: The input matrix. `shape = (n_points, n_points)`

    Returns:
        The Cholesky decomposition stored in the lower triangle.
            `shape = (n_points, n_points)`
    """
    assert len(matrix.shape) <= 2, (
        "The matrix has more than two input dimensions. Cholesky decomposition"
        "impossible."
    )
    assert (
        matrix.shape[0] == matrix.shape[1]
    ), "The matrix is not square. Cholesky decomposition impossible."

    _matrix = np.copy(matrix)  # to avoid modifying the input
    for k in itertools.count(start=1):
        try:
            chol = scipy.linalg.cholesky(_matrix, lower=True)
        except scipy.linalg.LinAlgError:
            # Increase eigenvalues of matrix
            np.fill_diagonal(_matrix, _matrix.diagonal() + 10 ** k * 1e-8)
        else:
            return chol


class FixedKernel(Fixed):
    """Fixed covariance kernel. Serializable version of the Fixed Kernel from `GPy`.

    Serialization is required to initialize a `gpy_adapter` `Model` using this kernel.
    """

    def __init__(
        self,
        input_dim: int,
        covariance_matrix: np.ndarray,
        active_dims: List[int] = None,
        name="PosteriorCov",
    ):
        """Initialize the kernel.

        Args:
            input_dim: Input dimension of the training data.
            covariance_matrix: The fixed covariance matrix.
            active_dims: Active dimensions.
            name: Name of the kernel.
        """
        super(FixedKernel, self).__init__(
            input_dim=input_dim,
            variance=1.0,
            covariance_matrix=covariance_matrix,
            active_dims=active_dims,
            name=name,
        )
        self.variance.fix()

    def to_dict(self) -> dict:
        """Save the kernel as a dictionary."""
        input_dict = super(Fixed, self)._save_to_input_dict()
        input_dict["covariance_matrix"] = self.fixed_K
        input_dict["class"] = "GPy.kern.Fixed"
        input_dict.pop("useGPU")

        return input_dict


def compute_alpha(model: "GP", x) -> np.ndarray:
    r"""Calculate the $\alpha(x)$ Woodbury vector used for computing the boosted
    covariance.

    $$
        \alpha(x) = k(x, X)\left(k(\X, \X) +\sigma^2\mathbb 1\right)^{-1}$,
    $$

    where $k$ is the kernel of `model`, $X$ is the training data of `model`, and
    $\sigma$ is the standard deviation of the observational noise.

    Args:
        model: The Gaussian-process model.
        x: The input data. `shape = (n_points, n_features)`

    Returns:
        The $\alpha$ vector. `shape = (n_points, n_training_points)`
    """
    L = model._gpy_model.posterior.woodbury_chol
    X = model.X
    k = model.compute_kernel(X, x)
    return scipy.linalg.solve_triangular(
        L.T, scipy.linalg.solve_triangular(L, k, lower=True)
    )


class CrossTaskKernel(BasisFuncKernel):
    """A kernel that is one iff the X-task corresponds to one of the `task_indices`."""

    def __init__(
        self,
        task_indices: Union[Tuple[int, int], int, np.ndarray],
        index_dim: int,
        variance=1.0,
        name="task_domain",
    ):
        super().__init__(
            input_dim=1,
            variance=variance,
            active_dims=(index_dim,),
            ARD=False,
            name=name,
        )
        self.task_indices = np.atleast_2d(np.asarray(task_indices, dtype=int))
        assert self.task_indices.size >= 1, "Need at least one task."

    def _phi(self, X: np.ndarray) -> np.ndarray:
        # atol maps our floats to tasks
        is_domain_task = np.isclose(X, self.task_indices, atol=0.5, rtol=0)
        return is_domain_task.any(axis=-1, keepdims=True)


================================================
FILE: transopt/optimizer/normalizer/__init__.py
================================================
from transopt.optimizer.normalizer.standerd import Standard_normalizer
from transopt.optimizer.normalizer.normalizer_base import NormalizerBase

================================================
FILE: transopt/optimizer/normalizer/normalizer_base.py
================================================
from abc import abstractmethod, ABC
from typing import Dict, Hashable
import numpy as np

class NormalizerBase(ABC):
    def __init__(self, config):
        self.config = config
    @abstractmethod
    def fit(self, X, Y):
        raise NotImplementedError
    @abstractmethod 
    def transform(self, X = None, Y = None):
        raise NotImplementedError
    @abstractmethod
    def inverse_transform(self, X = None, Y = None):

        raise NotImplementedError 
    

================================================
FILE: transopt/optimizer/normalizer/standerd.py
================================================
import numpy as np

from sklearn.preprocessing import StandardScaler

from transopt.agent.registry import normalizer_registry
from transopt.optimizer.normalizer.normalizer_base import NormalizerBase


# class XScaler:
#     def __init__(self, ranges):
#         self.ranges = np.array(ranges)
#         self.min = self.ranges[:, 0]
#         self.max = self.ranges[:, 1]
    
    
#     def transform(self, values):
#         values = np.array(values)
#         scaled_values = 2 * (values - self.min) / (self.max - self.min) - 1
#         return scaled_values
    
#     def inverse_transform(self, scaled_values):
#         scaled_values = np.array(scaled_values)
#         values = (scaled_values + 1) / 2 * (self.max - self.min) + self.min
#         return values
    
    
@normalizer_registry.register("Standard")
class Standard_normalizer(NormalizerBase):
    def __init__(self, config, metadata =  None, metadata_info = None):
        self.y_normalizer = StandardScaler()
        super(Standard_normalizer, self).__init__(config)
        

    def fit(self, X, Y):
        self.y_normalizer.fit(Y)
            
    def transform(self, X = None, Y = None):
        # if X is not None:
        #     X = self.x_normalizer.transform(X)
        if Y is not None:
            Y = self.y_normalizer.transform(Y)
        return X, Y

    def inverse_transform(self, X = None, Y = None):
        # if X is not None:
        #     X = self.x_normalizer.inverse_transform(X)
        if Y is not None:
            Y = self.y_normalizer.inverse_transform(Y)
        return X, Y

================================================
FILE: transopt/optimizer/optimizer_base/EvoOptimizerBase.py
================================================
import abc
import numpy as np
import ConfigSpace
import math
from typing import Union, Dict, List
from transopt.optimizer.optimizer_base import OptimizerBase
import GPyOpt
from transopt.utils.serialization import vectors_to_ndarray, output_to_ndarray
from transopt.utils.Visualization import visual_oned, visual_contour


class EVOBase(OptimizerBase):
    """
    The abstract Model for Evolutionary Optimization
    """
    def __init__(self, config):
        super(EVOBase, self).__init__(config=config)
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self.config = config
        self.search_space = None
        self.design_space = None
        self.mapping = None
        self.ini_num = None
        self._data_handler = None
        self.population = None
        self.pop_size = None


================================================
FILE: transopt/optimizer/optimizer_base/__init__.py
================================================
# from optimizer.optimizer_base.optimizerBase import OptimizerBase
# from optimizer.optimizer_base.bo_base import BOBase


================================================
FILE: transopt/optimizer/optimizer_base/base.py
================================================
import abc
from typing import List, Dict, Union

class OptimizerBase(abc.ABC, metaclass=abc.ABCMeta):
    """Abstract base class for the optimizers in the benchmark. This creates a common API across all packages.
    """

    # Every implementation package needs to specify this static variable, e.g., "primary_import=opentuner"
    primary_import = None

    def __init__(self, config, **kwargs):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        self.config = config
        # self.verbose = config['verbose']
        # self.optimizer_name = config['optimizer_name']
        # self.exp_path = config['save_path']


    @abc.abstractmethod
    def suggest(self, n_suggestions:Union[None, int] = None)->List[Dict]:
        """Get a suggestion from the optimizer.

        Parameters
        ----------
        n_suggestions : int
            Desired number of parallel suggestions in the output

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        pass

    @abc.abstractmethod
    def observe(self, input_vectors: Union[List[Dict], Dict], output_value: Union[List[Dict], Dict]) -> None:
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        pass


================================================
FILE: transopt/optimizer/optimizer_base/bo.py
================================================
import abc
import copy
import math
from typing import Dict, List, Union

import GPyOpt
import numpy as np

from transopt.optimizer.acquisition_function.sequential import Sequential
from transopt.optimizer.optimizer_base.base import OptimizerBase
from transopt.space.fidelity_space import FidelitySpace
from transopt.space.search_space import SearchSpace
from transopt.utils.serialization import (multioutput_to_ndarray,
                                          output_to_ndarray)


class BO(OptimizerBase):
    """
    The abstract Model for Bayesian Optimization
    """

    def __init__(self, Refiner, Sampler, ACF, Pretrain, Model, Normalizer, config):
        super(BO, self).__init__(config=config)
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self.config = config
        self.search_space = None
        self.ini_num = 10
        
        self.SpaceRefiner = Refiner
        self.Sampler = Sampler
        self.ACF = ACF
        self.Pretrain = Pretrain
        self.Model = Model
        self.Normalizer = Normalizer

        
        self.ACF.link_model(model=self.Model)
        
        self.MetaData = None
    
    def link_task(self, task_name:str, search_space: SearchSpace):
        self.task_name = task_name
        self.search_space = search_space
        self._X = np.empty((0,))  # Initializes an empty ndarray for input vectors
        self._Y = np.empty((0,))
        self.ACF.link_space(self.search_space)
        self.evaluator = Sequential(self.ACF, batch_size=1)
            
    
    def search_space_refine(self, metadata = None, metadata_info = None):
        if self.SpaceRefiner is not None:
            self.search_space = self.SpaceRefiner.refine_space(self.search_space)
            self.ACF.link_space(self.search_space)
            self.evaluator = Sequential(self.ACF)
            
    def sample_initial_set(self, metadata = None, metadata_info = None):
        return self.Sampler.sample(self.search_space, self.ini_num)
    
    def pretrain(self, metadata = None, metadata_info = None):
        if self.Pretrain:
            self.Pretrain.set_data(metadata, metadata_info)
            self.Pretrain.meta_train()
    
    
    def meta_fit(self, metadata = None, metadata_info = None):
        if metadata:
            source_X = []
            source_Y = []
            for key, datasets in metadata.items():
                data_info = metadata_info[key]
                source_X.append(np.array([[data[var['name']] for var in data_info['variables']] for data in datasets]))
                source_Y.append(np.array([[data[var['name']] for var in data_info['objectives']] for data in datasets]))
                
            self.Model.meta_fit(source_X, source_Y)
    
    def fit(self):

        Y = copy.deepcopy(self._Y)
            
        X = copy.deepcopy(self._X)
        
        self.Model.fit(X, Y, optimize = True)
            
    def suggest(self):
        suggested_sample, acq_value = self.evaluator.compute_batch(None, context_manager=None)
        # suggested_sample = self.search_space.zip_inputs(suggested_sample)

        if self.Normalizer:
            suggested_sample = self.Normalizer.inverse_transform(X=suggested_sample)[0]
        
        return suggested_sample

        
    def observe(self, X: np.ndarray, Y: List[Dict]) -> None:
        # Check if the lists are empty and return if they are
        if X.shape[0] == 0 or len(Y) == 0:
            return

        Y = np.array(output_to_ndarray(Y))
        if self.Normalizer:
            self.Normalizer.fit(X, Y)
            X, Y = self.Normalizer.transform(X, Y)
        
        self._X = np.vstack((self._X, X)) if self._X.size else X
        self._Y = np.vstack((self._Y, Y)) if self._Y.size else Y


================================================
FILE: transopt/optimizer/pretrain/__init__.py
================================================
from transopt.optimizer.pretrain.deepkernelpretrain import DeepKernelPretrain
from transopt.optimizer.pretrain.hyper_bo import HyperBOPretrain

================================================
FILE: transopt/optimizer/pretrain/deepkernelpretrain.py
================================================
import copy
import os

import gpytorch
import numpy as np
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

from transopt.agent.registry import pretrain_registry
from transopt.optimizer.pretrain.pretrain_base import PretrainBase

np.random.seed(1203)
RandomQueryGenerator= np.random.RandomState(413)
RandomSupportGenerator= np.random.RandomState(413)
RandomTaskGenerator = np.random.RandomState(413)


class Metric(object):
    def __init__(self,prefix='train: '):
        self.reset()
        self.message=prefix + "loss: {loss:.2f} - noise: {log_var:.2f} - mse: {mse:.2f}"
        
    def update(self,loss,noise,mse):
        self.loss.append(loss.item())
        self.noise.append(noise.item())
        self.mse.append(mse.item())
    
    def reset(self,):
        self.loss = []
        self.noise = []
        self.mse = []
    
    def report(self):
        return self.message.format(loss=np.mean(self.loss),
                            log_var=np.mean(self.noise),
                            mse=np.mean(self.mse))
    
    def get(self):
        return {"loss":np.mean(self.loss),
                "noise":np.mean(self.noise),
                "mse":np.mean(self.mse)}
    

def totorch(x,device):

    return torch.Tensor(x).to(device)    

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size=[32,32,32,32], dropout=0.0):
        
        super(MLP, self).__init__()
        self.nonlinearity = nn.ReLU()
        self.fc = nn.ModuleList([nn.Linear(in_features=input_size, out_features=hidden_size[0])])
        for d_out in hidden_size[1:]:
            self.fc.append(nn.Linear(in_features=self.fc[-1].out_features, out_features=d_out))
        self.out_features = hidden_size[-1]
        self.dropout = nn.Dropout(dropout)
    def forward(self,x):
        
        for fc in self.fc[:-1]:
            x = fc(x)
            x = self.dropout(x)
            x = self.nonlinearity(x)
        x = self.fc[-1](x)
        x = self.dropout(x)
        return x

class ExactGPLayer(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood,config,dims ):
        super(ExactGPLayer, self).__init__(train_x, train_y, likelihood)
        self.mean_module  = gpytorch.means.ConstantMean()

        if(config["kernel"]=='rbf' or config["kernel"]=='RBF'):
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=dims if config["ard"] else None))
        elif(config["kernel"]=='matern'):
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=config["nu"],ard_num_dims=dims if config["ard"] else None))
        else:
            raise ValueError("[ERROR] the kernel '" + str(config["kernel"]) + "' is not supported for regression, use 'rbf' or 'spectral'.")
            
    def forward(self, x):
        mean_x  = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
    
    
@pretrain_registry.register("DeepKernelPretrain")
class DeepKernelPretrain(nn.Module):
    def __init__(self, config = {}):
        super(DeepKernelPretrain, self).__init__()
        ## GP parameters
        if len(config) == 0:
            self.config = {"kernel": "matern", 'ard': False, "nu": 2.5, 'hidden_size': [32,32,32,32],
                           'n_inner_steps': 1, 'test_batch_size':1, 'batch_size':1, 'seed':0, 'checkpoint_path':'./external/model/FSBO/'}
        else:
            self.config = config
            
        self.batch_size = self.config['batch_size']
        self.test_batch_size = self.config['test_batch_size']
        self.n_inner_steps = self.config['n_inner_steps']
        self.checkpoint_path = self.config['checkpoint_path']

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.hidden_size = [32,32,32,32]
        self.kernel_config = {"kernel": self.config['kernel'], 'ard': self.config['ard'], "nu": self.config['nu']}
        self.Seed = self.config['seed']

        self.train_metrics = Metric()
        self.valid_metrics = Metric(prefix="valid: ")
        self.mse        = nn.MSELoss()
        self.curr_valid_loss = np.inf
        os.makedirs(self.checkpoint_path,exist_ok=True)

        print(self)

    def set_data(self, metadata, metadata_info= None):
        
        train_data = {}
        for dataset_name, data in metadata.items():
            objectives = metadata_info[dataset_name]["objectives"]
            obj = objectives[0]["name"]

            obj_data = [d[obj] for d in data]
            var_data = [[d[var["name"]] for var in metadata_info[dataset_name]["variables"]] for d in data]
            self.input_size = metadata_info[dataset_name]['num_variables']
            train_data[dataset_name] = {'X':np.array(var_data), 'y':np.array(obj_data)[:, np.newaxis]}
            
        self.train_data = train_data
        self.feature_extractor =  MLP(self.input_size, hidden_size = self.hidden_size).to(self.device)
        self.get_tasks()

    def get_tasks(self,):
        self.tasks = list(self.train_data.keys())


    def get_model_likelihood_mll(self, train_size):
        
        train_x=torch.ones(train_size, self.feature_extractor.out_features).to(self.device)
        train_y=torch.ones(train_size).to(self.device)

        likelihood = gpytorch.likelihoods.GaussianLikelihood()
        model = ExactGPLayer(train_x = train_x, train_y = train_y, likelihood = likelihood, config = self.kernel_config, dims = self.feature_extractor.out_features)
        self.model = model.to(self.device)
        self.likelihood = likelihood.to(self.device)
        self.mll        = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model).to(self.device)


    def epoch_end(self):
        RandomTaskGenerator.shuffle(self.tasks)


    def meta_train(self, epochs = 50000, lr = 0.0001):
        self.get_model_likelihood_mll(self.batch_size)
        
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=1e-7)
        

        for epoch in range(epochs):
            self.train_loop(epoch, optimizer, scheduler)
        self.save_checkpoint(self.checkpoint_path + f'Seed_{self.Seed}_{len(self.tasks)}')
    def train_loop(self, epoch, optimizer, scheduler=None):
        self.epoch_end()
        assert(self.training)
        for task in self.tasks:
            inputs, labels = self.get_batch(task)
            for _ in range(self.n_inner_steps):
                optimizer.zero_grad()
                z = self.feature_extractor(inputs)
                self.model.set_train_data(inputs=z, targets=labels, strict=False)
                predictions = self.model(z)
                loss = -self.mll(predictions, self.model.train_targets)
                loss.backward()
                optimizer.step()
                mse = self.mse(predictions.mean, labels)
                self.train_metrics.update(loss,self.model.likelihood.noise,mse)
        if scheduler:
            scheduler.step()
        
        training_results = self.train_metrics.get()
            
        validation_results = self.valid_metrics.get()
        # for k,v in validation_results.items():
        #     self.valid_summary_writer.add_scalar(k, v, epoch)
        self.feature_extractor.train()
        self.likelihood.train()
        self.model.train()
        
        if validation_results["loss"] < self.curr_valid_loss:
            self.save_checkpoint(os.path.join(self.checkpoint_path,"weights"))
            self.curr_valid_loss = validation_results["loss"]
        self.valid_metrics.reset()       
        self.train_metrics.reset()
            
    def test_loop(self, task, train): 
        (x_support, y_support),(x_query,y_query) = self.get_support_and_queries(task,train)
        z_support = self.feature_extractor(x_support).detach()
        self.model.set_train_data(inputs=z_support, targets=y_support, strict=False)
        self.model.eval()        
        self.feature_extractor.eval()
        self.likelihood.eval()

        with torch.no_grad():
            z_query = self.feature_extractor(x_query).detach()
            pred    = self.likelihood(self.model(z_query))
            loss = -self.mll(pred, y_query)
            lower, upper = pred.confidence_region() #2 standard deviations above and below the mean

        mse = self.mse(pred.mean, y_query)

        return mse,loss

    def get_batch(self,task):

        Lambda,response =     np.array(self.train_data[task]["X"]), MinMaxScaler().fit_transform(np.array(self.train_data[task]["y"])).reshape(-1,)

        card, dim = Lambda.shape
        
        support_ids = RandomSupportGenerator.choice(np.arange(card),
                                              replace=False,size= min(self.batch_size, card))

        
        inputs,labels = Lambda[support_ids], response[support_ids]
        inputs,labels = totorch(inputs,device=self.device), totorch(labels.reshape(-1,),device=self.device)
        return inputs, labels
        
    def get_support_and_queries(self,task, train=False):
        

        hpo_data = self.valid_data if not train else self.train_data
        Lambda,response =     np.array(hpo_data[task]["X"]), MinMaxScaler().fit_transform(np.array(hpo_data[task]["y"])).reshape(-1,)
        card, dim = Lambda.shape

        support_ids = RandomSupportGenerator.choice(np.arange(card),
                                              replace=False,size=min(self.batch_size, card))
        diff_set = np.setdiff1d(np.arange(card),support_ids)
        query_ids = RandomQueryGenerator.choice(diff_set,replace=False,size=min(self.batch_size, len(diff_set)))
        
        support_x,support_y = Lambda[support_ids], response[support_ids]
        query_x,query_y = Lambda[query_ids], response[query_ids]
        
        return (totorch(support_x,self.device),totorch(support_y.reshape(-1,),self.device)),\
    (totorch(query_x,self.device),totorch(query_y.reshape(-1,),self.device))
        
    def save_checkpoint(self, checkpoint):

        gp_state_dict         = self.model.state_dict()
        likelihood_state_dict = self.likelihood.state_dict()
        nn_state_dict         = self.feature_extractor.state_dict()
        torch.save({'gp': gp_state_dict, 'likelihood': likelihood_state_dict, 'net':nn_state_dict}, checkpoint)

    def load_checkpoint(self, checkpoint):
        ckpt = torch.load(checkpoint)
        self.model.load_state_dict(ckpt['gp'])
        self.likelihood.load_state_dict(ckpt['likelihood'])
        self.feature_extractor.load_state_dict(ckpt['net'])
        

================================================
FILE: transopt/optimizer/pretrain/get_pretrain.py
================================================
from transopt.agent.registry import pretrain_registry


def get_pretrain(pretrain_name, **kwargs):
    """Create the optimizer object."""
    pretrain_class = pretrain_registry.get(pretrain_name)
    config = kwargs

    if pretrain_class is not None:
        pretrain_method = pretrain_class(config=config)
    else:
        print(f"Refiner '{pretrain_name}' not found in the registry.")
        raise NameError
    return pretrain_method

================================================
FILE: transopt/optimizer/pretrain/hyper_bo.py
================================================
from transopt.agent.registry import pretrain_registry
from transopt.optimizer.pretrain.pretrain_base import PretrainBase


@pretrain_registry.register("hyperbo")
class HyperBOPretrain(PretrainBase):
    def __init__(self, config) -> None:
        super().__init__(config)

================================================
FILE: transopt/optimizer/pretrain/pretrain_base.py
================================================


class PretrainBase:
    def __init__(self) -> None:
        pass

================================================
FILE: transopt/optimizer/refiner/__init__.py
================================================
from transopt.optimizer.refiner.box import BoxRefiner
from transopt.optimizer.refiner.ellipse import EllipseRefiner
from transopt.optimizer.refiner.prune import Prune

================================================
FILE: transopt/optimizer/refiner/box.py
================================================
from transopt.optimizer.refiner.refiner_base import RefinerBase
from transopt.agent.registry import space_refiner_registry

@space_refiner_registry.register("box")
class BoxRefiner(RefinerBase):
    def __init__(self, config) -> None:
        super().__init__(config)
        

================================================
FILE: transopt/optimizer/refiner/ellipse.py
================================================
from transopt.optimizer.refiner.refiner_base import RefinerBase
from transopt.agent.registry import space_refiner_registry

@space_refiner_registry.register("ellipse")
class EllipseRefiner(RefinerBase):
    def __init__(self, config) -> None:
        super().__init__(config)

================================================
FILE: transopt/optimizer/refiner/get_refiner.py
================================================
from transopt.agent.registry import space_refiner_registry


def get_refiner(refiner_name, **kwargs):
    """Create the optimizer object."""
    refiner_class = space_refiner_registry.get(refiner_name)
    config = kwargs

    if refiner_class is not None:
        refiner = refiner_class(config=config)
    else:
        print(f"Refiner '{refiner_name}' not found in the registry.")
        raise NameError
    return refiner

================================================
FILE: transopt/optimizer/refiner/prune.py
================================================

from transopt.optimizer.refiner.refiner_base import RefinerBase
from transopt.agent.registry import space_refiner_registry

@space_refiner_registry.register("Prune")
class Prune(RefinerBase):
    def __init__(self, config) -> None:
        super().__init__(config)
            
    def refine(self, search_space, metadata=None):
        
        raise NotImplementedError("Sample method should be implemented by subclasses.")
    
    def check_metadata_avaliable(self, metadata):
        if metadata is None:
            return False
        return True 

================================================
FILE: transopt/optimizer/refiner/refiner_base.py
================================================


class RefinerBase:
    def __init__(self, config) -> None:
        self.config = config
        
    def refine(self, search_space, metadata=None):
        
        raise NotImplementedError("Sample method should be implemented by subclasses.")
    
    def check_metadata_avaliable(self, metadata):
        if metadata is None:
            return False
        return True

================================================
FILE: transopt/optimizer/sampler/__init__.py
================================================
from transopt.optimizer.sampler.random import RandomSampler
from transopt.optimizer.sampler.sobel import SobolSampler
from transopt.optimizer.sampler.lhs import LatinHypercubeSampler

================================================
FILE: transopt/optimizer/sampler/get_sampler.py
================================================
from transopt.agent.registry import sampler_registry


def get_sampler(sampler_name, **kwargs):
    """Create the optimizer object."""
    sampler_class = sampler_registry.get(sampler_name)

    if sampler_class is not None:
        sampler = sampler_class(config=kwargs)
    else:
        print(f"Sampler '{sampler_name}' not found in the registry.")
        raise NameError
    return sampler

================================================
FILE: transopt/optimizer/sampler/gradient.py
================================================


================================================
FILE: transopt/optimizer/sampler/grid.py
================================================
import numpy as np
from sampler.sampler_base import Sampler
from agent.registry import sampler_registry

# @sampler_registry.register("grid")
class GridSampler(Sampler):
    def generate_grid_for_variable(self, var_range, is_discrete, steps):
        if is_discrete:
            if (var_range[1] - var_range[0] + 1) <= steps:
                return np.arange(var_range[0], var_range[1] + 1)
            else:
                return np.linspace(
                    var_range[0], var_range[1], num=steps, endpoint=True
                ).round()
        else:
            return np.linspace(var_range[0], var_range[1], num=steps)

    def sample(self, search_space, steps=5, metadata=None):
        grids = []
        for name in search_space.variables_order:
            var_range = search_space.ranges[name]
            is_discrete = search_space.var_discrete[name]
            grid = self.generate_grid_for_variable(var_range, is_discrete, steps)
            grids.append(grid)

        mesh = np.meshgrid(*grids, indexing="ij")
        sample_points = np.stack(mesh, axis=-1).reshape(
            -1, len(search_space.variables_order)
        )
        return sample_points


================================================
FILE: transopt/optimizer/sampler/lhs.py
================================================
import numpy as np
from scipy.stats import qmc

from transopt.optimizer.sampler.sampler_base import Sampler
from transopt.agent.registry import sampler_registry
from transopt.space.search_space import SearchSpace


@sampler_registry.register("lhs")
class LatinHypercubeSampler(Sampler):
    def sample(self, search_space:SearchSpace, metadata = None):
        d = len(search_space.variables_order)
        sampler = qmc.LatinHypercube(d=d)
        sample_points = sampler.random(n=self.n_samples)
        for i, name in enumerate(search_space.variables_order):
            var_range = search_space.ranges[name]
            if search_space.var_discrete[name]: 
                continuous_vals = qmc.scale(
                    sample_points[:, i][np.newaxis], var_range[0], var_range[1]
                )
                sample_points[:, i] = np.round(continuous_vals).astype(int)
            else:  # 连续变量处理
                sample_points[:, i] = qmc.scale(sample_points[:, i][np.newaxis], var_range[0], var_range[1])
        return sample_points


================================================
FILE: transopt/optimizer/sampler/lhs_BAK.py
================================================
import numpy as np
import scipy.stats.qmc as qmc
from scipy import spatial
from scipy import stats
from scipy import linalg
from numpy import ma

__all__ = ["lhs"]


def lhs(d, samples=None, criterion=None, iterations=5, correlation_matrix=None):
    """
    Generate a latin-hypercube design
    Parameters
    ----------
    d : int
        The number of factors to generate samples for
    Optional
    --------
    samples : int
        The number of samples to generate for each factor (Default: d)
    criterion : str
        Allowable values are "center" or "c", "maximin" or "m",
        "centermaximin" or "cm", and "correlation" or "corr". If no value
        given, the design is simply randomized.
    iterations : int
        The number of iterations in the maximin and correlations algorithms
        (Default: 5).
    correlation_matrix : ndarray
         Enforce correlation between factors (only used in lhsmu)
    Returns
    -------
    H : 2d-array
        An n-by-samples design matrix that has been normalized so factor values
        are uniformly spaced between zero and one.
    """
    H = None
    if samples is None:
        samples = d

    if criterion is None:
        return _lhsclassic(d, samples)

    criterion = criterion.lower()
    if not criterion in ("center", "c", "maximin", "m", "centermaximin", "cm", "correlation", "corr", "lhsmu"):
        raise ValueError('Invalid value for "criterion": {}'.format(criterion))

    if criterion in ("center", "c"):
        H = _lhscentered(d, samples)
    elif criterion in ("maximin", "m"):
        H = _lhsmaximin(d, samples, iterations, "maximin")
    elif criterion in ("centermaximin", "cm"):
        H = _lhsmaximin(d, samples, iterations, "centermaximin")
    elif criterion in ("correlation", "corr"):
        H = _lhscorrelate(d, samples, iterations)
    elif criterion in ("lhsmu"):
        # as specified by the paper. M is set to 5
        H = _lhsmu(d, samples, correlation_matrix, M=5)

    return H


def _lhsclassic(d, samples):
    sampler = qmc.LatinHypercube(d=d)
    return sampler.random(n=samples)


def _lhscentered(d, samples):
    sampler = qmc.LatinHypercube(d=d)
    H = sampler.random(n=samples)
    H = (np.floor(H * samples) + 0.5) / samples
    return H


def _lhsmaximin(d, samples, iterations, lhstype):
    maxdist = 0
    best_sample = None

    for i in range(iterations):
        sampler = qmc.LatinHypercube(d=d)
        Hcandidate = sampler.random(n=samples)

        if lhstype != "maximin":
            Hcandidate = (np.floor(Hcandidate * samples) + 0.5) / samples

        d = spatial.distance.pdist(Hcandidate, "euclidean")
        min_d = np.min(d)
        if maxdist < min_d:
            maxdist = min_d
            best_sample = Hcandidate.copy()

    return best_sample if best_sample is not None else np.zeros((samples, d))


def _lhscorrelate(d, samples, iterations):
    mincorr = np.inf
    best_sample = None

    for i in range(iterations):
        sampler = qmc.LatinHypercube(d=d)
        Hcandidate = sampler.random(n=samples)

        R = np.corrcoef(Hcandidate.T)
        max_corr = np.max(np.abs(R - np.eye(d)))

        if max_corr < mincorr:
            mincorr = max_corr
            best_sample = Hcandidate.copy()

    return best_sample


def _lhsmu(d, samples=None, corr=None, M=5):
    if samples is None:
        samples = d

    I = M * samples

    rdpoints = np.random.uniform(size=(I, d))

    dist = spatial.distance.cdist(rdpoints, rdpoints, metric="euclidean")
    D_ij = ma.masked_array(dist, mask=np.identity(I))

    index_rm = np.zeros(I - samples, dtype=int)
    i = 0
    while i < I - samples:
        order = ma.sort(D_ij, axis=1)
        avg_dist = ma.mean(order[:, 0:2], axis=1)
        min_l = ma.argmin(avg_dist)

        D_ij[min_l, :] = ma.masked
        D_ij[:, min_l] = ma.masked

        index_rm[i] = min_l
        i += 1

    rdpoints = np.delete(rdpoints, index_rm, axis=0)

    if corr is not None:
        # check if covariance matrix is valid
        assert type(corr) == np.ndarray
        assert corr.ndim == 2
        assert corr.shape[0] == corr.shape[1]
        assert corr.shape[0] == d

        norm_u = stats.norm().ppf(rdpoints)
        L = linalg.cholesky(corr, lower=True)

        norm_u = np.matmul(norm_u, L)

        H = stats.norm().cdf(norm_u)
    else:
        H = np.zeros_like(rdpoints, dtype=float)
        rank = np.argsort(rdpoints, axis=0)

        for l in range(samples):
            low = float(l) / samples
            high = float(l + 1) / samples

            l_pos = rank == l
            H[l_pos] = np.random.uniform(low, high, size=d)
    return H


if __name__ == "__main__":
    """
    Example
    -------
    A 3-factor design (defaults to 3 samples)::
        >>> lhs(3, random_state=42)
        array([[ 0.12484671,  0.95539205,  0.24399798],
               [ 0.53288616,  0.38533955,  0.86703834],
               [ 0.68602787,  0.31690477,  0.38533151]])
    A 4-factor design with 6 samples::
        >>> lhs(4, samples=6)
        array([[ 0.06242335,  0.19266575,  0.88202411,  0.89439364],
               [ 0.19266977,  0.53538985,  0.53030416,  0.49498498],
               [ 0.71737371,  0.75412607,  0.17634727,  0.71520486],
               [ 0.63874044,  0.85658231,  0.33676408,  0.31102936],
               [ 0.43351917,  0.45134543,  0.12199899,  0.53056742],
               [ 0.93530882,  0.15845238,  0.7386575 ,  0.09977641]])
    A 2-factor design with 5 centered samples::
        >>> lhs(2, samples=5, criterion='center', random_state=42)
        array([[ 0.1,  0.9],
               [ 0.5,  0.5],
               [ 0.7,  0.1],
               [ 0.3,  0.7],
               [ 0.9,  0.3]])
    A 3-factor design with 4 samples where the minimum distance between
    all samples has been maximized::
        >>> lhs(3, samples=4, criterion='maximin')
        array([[ 0.69754389,  0.2997106 ,  0.96250964],
               [ 0.10585037,  0.09872038,  0.73157522],
               [ 0.25351996,  0.65148999,  0.07337204],
               [ 0.91276926,  0.97873992,  0.42783549]])
    A 4-factor design with 5 samples where the samples are as uncorrelated
    as possible (within 10 iterations)::
        >>> lhs(4, samples=5, criterion='correlation', iterations=10)
        array([[ 0.72088348,  0.05121366,  0.97609357,  0.92487081],
               [ 0.49507404,  0.51265511,  0.00808672,  0.37915272],
               [ 0.22217816,  0.2878673 ,  0.24034384,  0.42786629],
               [ 0.91977309,  0.93895699,  0.64061224,  0.14213258],
               [ 0.04719698,  0.70796822,  0.53910322,  0.78857071]])
    """

    h1 = lhs(4, samples=5)
    print(h1)

    sampler = qmc.LatinHypercube(d=4)
    h2 = sampler.random(n=5)
    print(h2)
    
    d = 3
    samples = 10
    corr = np.array([[1.0, 0.5, 0.2],
                    [0.5, 1.0, 0.3],
                    [0.2, 0.3, 1.0]])
    sampled_data = _lhsmu(d, samples, corr, M=5)
    print("Generated samples with specified correlation:")
    print(sampled_data)

================================================
FILE: transopt/optimizer/sampler/meta.py
================================================


================================================
FILE: transopt/optimizer/sampler/random.py
================================================
import numpy as np

from transopt.optimizer.sampler.sampler_base import Sampler
from transopt.agent.registry import sampler_registry

@sampler_registry.register("random")
class RandomSampler(Sampler):
    def sample(self, search_space, metadata = None):
        samples = np.zeros((self.n_samples, len(search_space.variables_order)))
        for i, name in enumerate(search_space.variables_order):
            var_range = search_space.ranges[name]
            if search_space.var_discrete[name]:  # 判断是否为离散变量
                samples[:, i] = np.random.randint(
                    var_range[0], var_range[1] + 1, size=self.n_samples
                )
            else:
                samples[:, i] = np.random.uniform(
                    var_range[0], var_range[1], size=self.n_samples
                )
        return samples


================================================
FILE: transopt/optimizer/sampler/sampler_base.py
================================================

class Sampler:
    def __init__(self, n_samples, config) -> None:
        self.config = config
        self.n_samples = n_samples
        
    def sample(self, search_space, metadata=None):
        raise NotImplementedError("Sample method should be implemented by subclasses.")
    
    def change_n_samples(self, n_samples):
        self.n_samples = n_samples
    
    def check_metadata_avaliable(self, metadata):
        if metadata is None:
            return False
        return True

================================================
FILE: transopt/optimizer/sampler/sobel.py
================================================
import numpy as np
from scipy.stats import qmc

from transopt.optimizer.sampler.sampler_base import Sampler
from transopt.agent.registry import sampler_registry

@sampler_registry.register("sobol")
class SobolSampler(Sampler):
    def sample(self, search_space, metadata = None):
        d = len(search_space.variables_order)
        sampler = qmc.Sobol(d=d, scramble=True)
        sample_points = sampler.random(n=self.n_samples)
        for i, name in enumerate(search_space.variables_order):
            var_range = search_space.ranges[name]
            if search_space.var_discrete[name]:
                # 对离散变量进行处理
                continuous_vals = qmc.scale(
                    sample_points[:, i], var_range[0], var_range[1]
                )
                sample_points[:, i] = np.round(continuous_vals).astype(int)
            else:
                sample_points[:, i] = qmc.scale(
                    sample_points[:, i], var_range[0], var_range[1]
                )
        return sample_points


================================================
FILE: transopt/optimizer/selector/__init__.py
================================================
from transopt.optimizer.selector.selector_base import SelectorBase
from transopt.optimizer.selector.lsh_selector import LSHSelector
from transopt.optimizer.selector.fuzzy_selector import FuzzySelector

================================================
FILE: transopt/optimizer/selector/fuzzy_selector.py
================================================
from transopt.agent.registry import selector_registry
from transopt.optimizer.selector.selector_base import SelectorBase


@selector_registry.register("Fuzzy")
class FuzzySelector(SelectorBase):
    def __init__(self, config):
        super(FuzzySelector, self).__init__(config)

    def fetch_data(self, tasks_info):
        task_name = tasks_info["additional_config"]["problem_name"]        
        variable_names = [var['name'] for var in tasks_info["variables"]] 
        dimensions = len(variable_names)
        objectives = len(tasks_info["objectives"])
        
        conditions = {
            "task_name": task_name,
            "dimensions": dimensions,
            "objectives": objectives,
        }

        datasets_list = self.data_manager.db.search_tables_by_metadata(conditions)
        metadata = {
            dataset_name: self.data_manager.db.select_data(dataset_name)
            for dataset_name in datasets_list
        }
        metadata_info = {
            dataset_name: self.data_manager.db.query_dataset_info(dataset_name)
            for dataset_name in datasets_list
        }

        return metadata, metadata_info


================================================
FILE: transopt/optimizer/selector/lsh_selector.py
================================================
from transopt.optimizer.selector.selector_base import SelectorBase 
from transopt.agent.registry import selector_registry

@selector_registry.register('LSH')
class LSHSelector(SelectorBase):
    def __init__(self, config):
        
        super(LSHSelector, self).__init__(config)
        
    def fetch_data(self, tasks_info):
        task_name = tasks_info['additional_config']['problem_name']
        variable_names = [var['name'] for var in tasks_info["variables"]] 
        num_variables = len(variable_names)
        num_objectives = len(tasks_info["objectives"])
        name_str = " ".join(variable_names)
        datasets_list = self.data_manager.search_similar_datasets(task_name, {'variable_names':name_str, 'num_variables':num_variables, 'num_objectives':num_objectives})
        metadata = {}
        metadata_info = {}
        for dataset_name in datasets_list:
                metadata[dataset_name] = self.data_manager.db.select_data(dataset_name)
                metadata_info[dataset_name] = self.data_manager.db.query_dataset_info(dataset_name)
        return metadata, metadata_info


================================================
FILE: transopt/optimizer/selector/selector_base.py
================================================


from transopt.datamanager.manager import DataManager
from abc import ABC, abstractmethod

class SelectorBase:
    def __init__(self, config):
        self.data_manager = DataManager()
        

    @abstractmethod
    def fetch_data(self, tasks_info):
        raise NotImplementedError
    
    
================================================
FILE: transopt/remote/__init__.py
================================================
from transopt.remote.experiment_tasks import celery_inst, ExperimentTaskHandler
from transopt.remote.experiment_server import ExperimentServer
from transopt.remote.experiment_client import ExperimentClient

================================================
FILE: transopt/remote/celeryconfig.py
================================================
## Broker settings.
broker_url = 'redis://localhost:6379/0'
broker_connection_retry_on_startup = True

## Using the database to store task state and results.
result_backend = 'redis://localhost:6379/0'

# If enabled the task will report its status as 'started' 
# when the task is executed by a worker.
task_track_started = True

================================================
FILE: transopt/remote/experiment_client.py
================================================
import requests
import time


class ExperimentClient:
    def __init__(self, server_url, timeout=10):
        self.server_url = server_url
        self.timeout = timeout

    def _handle_response(self, response):
        if response.status_code != 200:
            raise Exception(
                f"Server returned status code {response.status_code}: {response.text}"
            )
        return response.json()

    def start_experiment(self, params):
        try:
            response = requests.post(
                f"{self.server_url}/start_experiment", json=params, timeout=self.timeout
            )
            data = self._handle_response(response)
            return data.get("task_id")
        except requests.RequestException as e:
            raise Exception(f"Failed to start experiment: {e}")

    def get_experiment_result(self, task_id):
        try:
            response = requests.get(
                f"{self.server_url}/get_experiment_result/{task_id}",
                timeout=self.timeout,
            )
            return self._handle_response(response)
        except requests.RequestException as e:
            raise Exception(
                f"Failed to get experiment result for task ID {task_id}: {e}"
            )

    def wait_for_result(self, task_id, poll_interval=2):
        while True:
            result = self.get_experiment_result(task_id)
            if result["state"] == "SUCCESS":
                return result["result"]
            elif result["state"] == "FAILURE":
                raise Exception(f"Experiment failed with status: {result['status']}")
            else:
                print(f"Experiment state: {result['state']}")
            time.sleep(poll_interval)


if __name__ == "__main__":
    client = ExperimentClient(server_url="http://192.168.3.49:5000")

    params = {"param1": "value1", "param2": "value2"}  # Example parameters

    try:
        task_id = client.start_experiment(params)
        print(f"Experiment started with task ID: {task_id}")

        result = client.wait_for_result(task_id)
        print(f"Experiment result: {result}")
    except Exception as e:
        print(f"Error: {e}")

================================================
FILE: transopt/remote/experiment_server.py
================================================
from flask import Flask, jsonify, request
from transopt.remote import celery_inst, ExperimentTaskHandler


class ExperimentServer:
    def __init__(self, task_handler):
        self.app = Flask(__name__)
        self.task_handler = task_handler
        self._setup_routes()

    def _validate_params(self, params):
        required_keys = ["benchmark", "id", "budget", "seed", "bench_params", "fitness_params"]
        return all(key in params for key in required_keys)

    def _setup_routes(self):
        @self.app.route("/start_experiment", methods=["POST"])
        def start_experiment():
            params = request.json

            if not self._validate_params(params):
                return jsonify({"error": "Invalid parameters"}), 400

            try:
                task = self.task_handler.start_experiment(params)
                return jsonify({"task_id": task.id}), 200
            except Exception as e:
                # TODO:
                #   - better error handling
                return jsonify({"error": str(e)}), 500

        
        @self.app.route("/get_experiment_result/<task_id>", methods=["GET"])
        def get_experiment_result(task_id):
            task = celery_inst.AsyncResult(task_id)
            if task.state == "PENDING":
                response = {
                    "state": task.state,
                    "status": "Task is pending...",
                }
            elif task.state != "FAILURE":
                response = {
                    "state": task.state,
                    "result": task.result,
                }
            else:
                # task failed
                response = {
                    "state": task.state,
                    "status": str(task.info),  # this is the exception raised
                }
            return jsonify(response)

    def run(self, host="0.0.0.0", port=5001):
        self.app.run(host=host, port=port)


if __name__ == "__main__":
    task_handler = ExperimentTaskHandler()
    server = ExperimentServer(task_handler=task_handler)
    server.run()

================================================
FILE: transopt/remote/experiment_tasks.py
================================================
from celery import Celery, Task
from celery.utils.log import get_task_logger
from transopt.agent.registry import problem_registry

celery_inst = Celery(__name__)
celery_inst.config_from_object("celeryconfig")

logger = get_task_logger(__name__)


class DebugTask(Task):
    def on_failure(self, exc, task_id, args, kwargs, einfo):
        logger.warning(f"Task [{task_id}] failed: {exc}")

    def on_success(self, retval, task_id, args, kwargs):
        logger.warning(f"Task [{task_id}] succeeded with result: {retval}")

    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        logger.warning(f"Task [{task_id}] finished with status: {status}")


class ExperimentTaskHandler:
    def __init__(self):
        pass

    @celery_inst.task(bind=True, base=DebugTask)
    def run_experiment(self, params):
        # rdb.set_trace()
        bench_name = params["benchmark"]
        bench_id = params["id"]
        budget = params["budget"]
        seed = params["seed"]
        bench_params = params["bench_params"]
        fitness_params = params["fitness_params"]

        benchmark_cls = problem_registry.get(bench_name)

        if benchmark_cls is None:
            self.update_state(state="FAILURE", meta={"status": "Benchmark not found!"})
            raise ValueError(f"Benchmark {bench_name} not found!")

        try:
            problem = benchmark_cls(
                task_name=f"{bench_name}_{bench_id}",
                task_id=bench_id,
                budget=budget,
                seed=seed,
                params=bench_params,
            )

            result = problem.f(**fitness_params)
            return result
        except Exception as e:
            self.update_state(state="FAILURE", meta={"status": "Experiment failed!"})
            raise e

    def start_experiment(self, params):
        return self.run_experiment.apply_async(args=[params])


if __name__ == "__main__":
    # handler = ExperimentTaskHandler()
    # params = {
    #     "benchmark": "sample_bench",
    #     "id": 1,
    #     "budget": 100,
    #     "seed": 42,
    #     "bench_params": {},
    #     "fitness_params": {}
    # }
    
    # handler.start_experiment(params)
    pass


================================================
FILE: transopt/remote/server_manager.sh
================================================
#!/bin/bash

# Define SESSION_NAME
SESSION_NAME="experiment_server"

activate_conda_env() {
    local env_name="$1"
    local target_pane="$2"
    
    if [ -n "$env_name" ]; then
        tmux send-keys -t "$target_pane" "conda activate $env_name" C-m
    fi
}

display_shortcuts() {
    local target_pane="$1"
    
    # ANSI escape codes for bold and colored text
    local BOLD="\033[1m"
    local COLOR_RED="\033[31m"
    local RESET="\033[0m"
    
    # Set display-time to 10 seconds (10000 milliseconds)
    tmux set-option -t "$target_pane" display-time 10000
    
    # Display the shortcuts using tmux's display-message
    tmux display-message -t "$target_pane" "${BOLD}${COLOR_RED}SHORTCUTS: Ctrl-b n (next window), Ctrl-b p (previous window), Ctrl-b d (detach)${RESET}"
}

run_experiment_server() {
    local env_name="$1"
    
    # Start a new tmux session
    tmux new-session -d -s "$SESSION_NAME"
    
    # Activate conda environment (if specified) and run the Celery worker in the first window
    display_shortcuts "$SESSION_NAME:0"
    activate_conda_env "$env_name" "$SESSION_NAME:0"
    tmux send-keys -t "$SESSION_NAME:0" 'celery -A experiment_tasks.celery worker --loglevel=info' C-m
    
    # Run Flask in a new window
    tmux new-window -t "$SESSION_NAME:1"
    display_shortcuts "$SESSION_NAME:1"
    activate_conda_env "$env_name" "$SESSION_NAME:1"
    tmux send-keys -t "$SESSION_NAME:1" 'python experiment_server.py' C-m
    
    # Attach to the 'experiment_server' session
    tmux attach -t "$SESSION_NAME"
}

case "$1" in
    start)
        # Get the currently activated conda environment
        CURRENT_CONDA_ENV=$(conda env list | grep '*' | awk '{print $1}')
        
        if [ -z "$CURRENT_CONDA_ENV" ]; then
            echo "No Conda environment is currently activated."
            run_experiment_server ""
        else
            run_experiment_server "$CURRENT_CONDA_ENV"
        fi
    ;;
    
    attach)
        tmux attach -t "$SESSION_NAME"
    ;;
    
    stop)
        tmux kill-session -t "$SESSION_NAME"
    ;;
    
    *)
        echo "Usage: $0 {start|attach|stop}"
        exit 1
    ;;
esac

================================================
FILE: transopt/space/__init__.py
================================================
from .search_space import SearchSpace
from .variable import Continuous, Categorical, Integer, LogContinuous

================================================
FILE: transopt/space/fidelity_space.py
================================================
import copy

import numpy as np
import pandas as pd


class FidelitySpace:
    def __init__(self, fidelity_variables):
        self.ranges = {var.name: var for var in fidelity_variables}
    
    @property
    def fidelity_names(self):
        return self.ranges.keys()
    
    
    def get_fidelity_range(self):
        return self.ranges
    
    
================================================
FILE: transopt/space/search_space.py
================================================
import copy

import numpy as np
import pandas as pd


class SearchSpace:
    def __init__(self, variables):
        self._variables = {var.name: var for var in variables}
        self.variables_order = [var.name for var in variables]

        # 计算并存储原始范围和类型信息
        self.original_ranges = {
            name: var.search_space_range for name, var in self._variables.items()
        }
        self.var_discrete = {
            name: var.is_discrete for name, var in self._variables.items()
        }

        self.ranges = copy.deepcopy(self.original_ranges)
    
    
    def __getitem__(self, item):
        return self._variables.get(item)


    def __contains__(self, item):
        return item in self.variables_order

    def get_design_variables(self):
        return self._variables
    
    def get_design_variable(self, name):
        return self._variables[name]

    def get_hyperparameter_names(self):
        return list(self._variables.keys())
    
    def get_hyperparameter_types(self):
        return {name:self._variables[name].type for name in self._variables}
    
    
    def map_to_design_space(self, values: np.ndarray) -> dict:
        """
        Maps the given values from the search space to the design space.

        Args:
            values (np.ndarray): The values to be mapped from the search space. Must be a 1D NumPy array.

        Returns:
            dict: A dictionary containing the mapped values in the design space.

        Raises:
            ValueError: If the `values` parameter is not a 1D NumPy array.
        """

        values_dict = {}
        for i, name in enumerate(self.variables_order):
            variable = self._variables[name]
            value = values[i]
            values_dict[name] = variable.map2design(value)
        return values_dict
    
    def map_from_design_space(self, values_dict: dict) -> np.ndarray:
        """
        Maps values from the design space to the search space.

        Args:
            values_dict (dict): A dictionary containing variable names as keys and their corresponding values.

        Returns:
            np.ndarray: An array of mapped values in the search space.
        """
        values_array = np.zeros(len(self.variables_order))
        for i, name in enumerate(self.variables_order):
            variable = self._variables[name]
            value = values_dict[name]
            values_array[i] = variable.map2search(value)
        return values_array

    def update_range(self, name, new_range: tuple):
        """
        Update the range of a variable in the search space.

        Args:
            name (str): The name of the variable.
            new_range (tuple): The new range for the variable.

        Raises:
            ValueError: If the variable is not found in the search space or if the new range is out of the original range.
        """
        if name in self._variables:
            # Check if the new range is valid
            ori_range = self.original_ranges[name]
            if new_range[0] < ori_range[0] or new_range[1] > ori_range[1]:
                raise ValueError(
                    f"New range {new_range} is out of the original range {ori_range}."
                )
                
            self.ranges[name] = new_range
        else:
            raise ValueError(f"Variable '{name}' not found in search space.")


================================================
FILE: transopt/space/variable.py
================================================
import math
import numpy as np

class Variable:
    def __init__(self, name, type_):
        self.name = name
        self.type = type_

    @property
    def search_space_range(self):
        raise NotImplementedError

    def map2design(self, value):
        # To design space
        raise NotImplementedError

    def map2search(self, value):
        # To search space
        raise NotImplementedError


class Continuous(Variable):
    def __init__(self, name, range_):
        super().__init__(name, "continuous")
        self.range = range_
        
        self.is_discrete = False

    @property
    def search_space_range(self):
        return self.range

    def map2design(self, value):
        return float(value)  # Ensure it remains a float
    
    def map2search(self, value):
        return value


class Categorical(Variable):
    def __init__(self, name, categories):
        super().__init__(name, "categorical")
        self.categories = categories
        self.range = (1, len(self.categories))
        
        self.is_discrete = True

    @property
    def search_space_range(self):
        return (1, len(self.categories))

    def map2design(self, value):
        return self.categories[round(value) - 1]

    def map2search(self, value):
        return self.categories.index(value) + 1
    

class Integer(Variable):
    def __init__(self, name, range_):
        super().__init__(name, "integer")
        self.range = range_

        self.is_discrete = True

    @property
    def search_space_range(self):
        return self.range

    def map2design(self, value):
        # Ensure the mapped value is an integer
        return int(round(value)) 

    def map2search(self, value):
        return round(value)

class LargeInteger(Variable):
    def __init__(self, name, range_):
        super().__init__(name, "large_integer")
        self.range = range_
        self.is_discrete = True

    @property
    def search_space_range(self):
        # Convert large range to a manageable float range
        lower = 0
        upper = 1
        return lower, upper

    def map2design(self, value):
        # Map float value [0, 1] to the large integer range
        return min(int(self.range[0] + value * (self.range[1] - self.range[0])), self.range[1])

    def map2search(self, value):
        # Map large integer value to a float value in [0, 1]
        return (value - self.range[0]) / (self.range[1] - self.range[0])

class ExponentialInteger(Variable):
    def __init__(self, name, range_):
        super().__init__(name, "exp2")
        # Adjust the range to ensure it is in the form of [2^x, 2^y] and satisfies 2^63 - 1
        lower_bound = 2 ** math.floor(math.log2(range_[0]))
        upper_bound = min(2 ** math.ceil(math.log2(range_[1])), 2 ** 63)
        self.range = (lower_bound, upper_bound)
        self.is_discrete = True

    @property
    def search_space_range(self):
        lower = math.log2(self.range[0])
        upper = math.log2(self.range[1])
        return lower, upper

    def map2design(self, value):
        return int(2 ** value)

    def map2search(self, value):
        value = max(value, self.range[0])  # Ensure value is within valid range
        return math.log2(value)
    
class LogContinuous(Variable):
    def __init__(self, name, range_):
        super().__init__(name, "log_continuous")
        self.range = range_
        
        self.is_discrete = False

    @property
    def search_space_range(self):
        return self.range

    def map2design(self, value):
        return 10**value

    def map2search(self, value):
        return math.log10(value)


================================================
FILE: transopt/utils/Initialization.py
================================================
import random
import sobol_seq
import numpy as np
from sklearn.cluster import KMeans


def InitData(Init_method, KB, Init, Xdim, Dty, **kwargs):

    type = Init_method.split('_')[0]
    method = Init_method.split('_')[1]
    if type=='Continuous':
        if method == 'random':
            train_x = 2 * np.random.random(size=(Init, Xdim)) - 1
        elif method == 'uniform':
            train_x = 2 * sobol_seq.i4_sobol_generate(Xdim, Init) - 1
        elif method == 'fix':
            if KB.len == 0:
                train_x = np.array([[-0.5],[-0.25],[0.5],[0.42]])
            else:
                train_x = np.array([[-0.1], [-0.8], [0.25], [0.4]])
        elif method == 'LFL':
            seed = kwargs['seed']
            quantile = kwargs['quantile']
            try:
                train_x = np.loadtxt(f'./Bench/Lifelone_env/randIni/ini_{Xdim}d_{Init}p_{seed}.txt')
                if len(train_x.shape) == 1:
                    train_x = train_x[:,np.newaxis]
            except:
                train_x = 2 * np.random.random(size=(Init, Xdim)) - 1
                np.savetxt(f'./Bench/Lifelone_env/randIni/ini_{Xdim}d_{Init}p_{seed}.txt', train_x)
            anchor_point_num = int(quantile * Init)
            temp_x = train_x[:anchor_point_num]
            random_x = 2 * np.random.random(size=(100*Xdim, Xdim)) - 1
            train_x = np.vstack((temp_x, random_x[-(Init-anchor_point_num):]))
        idxs = None
    elif type=='Tabular':
        if method == 'random':
            if 'Env' in kwargs.keys():
                data_num = kwargs['Env'].get_dataset_size()
                rand_idxs = random.sample(range(0, data_num), Init)
                train_x = kwargs['Env'].get_var(rand_idxs)
                idxs = rand_idxs
        # elif Method == 'grid':
        #     if KB.len == 0:
        #         if np.float64 == Dty:
        #             train_x = 2 * np.random.random(size=(Init, Xdim)) - 1
        #         else:
        #             print('Unsupport data type! shut down')
        #             return
        #     else:
        #         train_x = KB.local_optimal[0]
        #         for i in range(1, KB.len):
        #             train_x = np.vstack((train_x, KB.local_optimal[i]))
        #         train_x = np.unique(train_x, axis=0)
        #
        #         if len(train_x) == Init:
        #             pass
        #             # train_x = np.array(train_x, dtype=Dty)
        #         elif len(train_x) > Init:
        #             result_x = []
        #             kmn = KMeans(n_clusters=int(Init), random_state=0)
        #             kmn.fit(train_x)
        #             lables = kmn.labels_
        #             centers = kmn.cluster_centers_
        #             for c_id,center in enumerate(centers):
        #                 min_dis = 100
        #                 min_dis_x_id = 0
        #                 for x_id, x in enumerate(train_x):
        #                     if lables[x_id] == c_id:
        #                         dis = np.linalg.norm(x - center)
        #                         if dis < min_dis:
        #                             min_dis = dis
        #                             min_dis_x_id = x_id
        #                 result_x.append(train_x[min_dis_x_id])
        #
        #             train_x = np.array(result_x)
        #             # train_x = np.concatenate(
        #             #     (train_x, 2 * np.random.random(size=(Init - len(train_x), Xdim)) - 1))
        #         else:
        #             # train_x = np.array(train_x, dtype=Dty)
        #             train_x = np.concatenate(
        #                 (train_x, 2 * np.random.random(size=(Init - len(train_x), Xdim)) - 1))
    else:
        raise ValueError

    return train_x, idxs


================================================
FILE: transopt/utils/Kernel.py
================================================
import GPy
import numpy as np
import matplotlib.pyplot as plt
from GPy.mappings.constant import Constant
from GPy.inference.latent_function_inference import expectation_propagation
from GPy.inference.latent_function_inference import ExactGaussianInference


def construct_multi_objective_kernel(input_dim, output_dim, base_kernel='RBF', Q=1, rank=2):
    # Choose the base kernel. Note: This part can be improved since it currently always chooses RBF.
    k = GPy.kern.RBF(input_dim=input_dim)

    kernel_list = [k] * Q
    j = 1
    kk = kernel_list[0]
    K = kk.prod(
        GPy.kern.Coregionalize(1, output_dim, active_dims=[input_dim], rank=rank, W=None, kappa=None,
                               name='B'), name='%s%s' % ('ICM', 0))
    for kernel in kernel_list[1:]:
        K += kernel.prod(
            GPy.kern.Coregionalize(1, output_dim, active_dims=[input_dim], rank=rank, W=None,
                                   kappa=None, name='B'), name='%s%s' % ('ICM', j))
        j += 1
    return K

================================================
FILE: transopt/utils/Normalization.py
================================================
import numpy as np
from typing import Union, Dict, List
from sklearn.preprocessing import power_transform
from agent.registry import normalizer_registry,normalizer_register

def get_normalizer(name):
    """Create the optimizer object."""
    normalizer = normalizer_registry.get(name)


    if normalizer is not None:
        return normalizer
    else:
        # 处理任务名称不在注册表中的情况
        print(f"Normalizer '{name}' not found in the registry.")
        raise NameError


@normalizer_register('pt')
def normalize_with_power_transform(data: Union[np.ndarray, list], mean=None, std=None):
    """
    Normalize the data using mean and standard deviation, followed by power transformation.

    Parameters:
    - data (Union[np.ndarray, list]): Input data to be normalized.
    - mean (float, optional): Mean for normalization.
    - std (float, optional): Std for normalization.

    Returns:
    - Union[np.ndarray, list]: Normalized and power transformed data.
    """

    # Handle multiple data sets (list of ndarrays)
    if type(data) is list:
        all_include = data[0]
        data_len = [0, len(data[0])]
        for Y in data[1:]:
            all_include = np.concatenate((all_include, Y), axis=0)
            data_len.append(len(all_include))
    else:  # Single data set
        all_include = data
        data_len = [0, len(data)]

    # Calculate mean and std if not provided
    if mean is None:
        mean = np.mean(all_include)
    if std is None:
        std = np.std(all_include)

    # Normalize and power transform
    all_include = power_transform((all_include - mean) / std, method='yeo-johnson')

    # Split back into multiple data sets if originally provided as a list
    if type(data) is list:
        new_data = []
        for i in range(len(data_len) - 1):
            new_data.append(all_include[data_len[i]:data_len[i + 1]])
        return new_data

    # Return the transformed data
    return all_include


def rank_normalize_with_power_transform(data: Union[np.ndarray, list]):
    """
    This function first replaces the actual values of the data with their ranks.
    After that, it standardizes and then applies a power transform (yeo-johnson) on the data.

    Args:
    - data (Union[np.ndarray, list]): The input data, either as a single ndarray or as a list of ndarrays.
    - mean (float, optional): Mean value to use for standardization. If not provided, it's computed from data.
    - std (float, optional): Standard deviation value to use for standardization. If not provided, it's computed from data.

    Returns:
    - np.ndarray or list of np.ndarray: Transformed data.
    """

    # Single ndarray input
    if isinstance(data, np.ndarray):
        # Replace the values in data with their corresponding ranks
        sorted_indices = np.argsort(data, axis=0)[:, 0]
        rank_array = np.zeros(shape=data.shape[0])
        rank_array[sorted_indices] = np.arange(1, len(data) + 1)

        # Apply standardization followed by power transformation
        return power_transform(rank_array[:, np.newaxis], method='yeo-johnson')

    # List of ndarrays input
    elif isinstance(data, list):
        new_data = []
        all_include = data[0]
        data_len = [0, len(data[0])]

        # Combine all datasets in the list for subsequent processing
        for Y in data[1:]:
            all_include = np.concatenate((all_include, Y), axis=0)
            data_len.append(len(all_include))

        # Replace the values in combined data with their corresponding ranks
        sorted_indices = np.argsort(all_include, axis=0)[:, 0]
        rank_array = np.zeros(shape=all_include.shape[0])
        rank_array[sorted_indices] = np.arange(1, len(all_include) + 1)


        # Apply standardization followed by power transformation
        all_include = power_transform((rank_array[:, np.newaxis]), method='yeo-johnson')

        # Split the transformed data back into separate datasets based on the original list
        for i in range(len(data_len) - 1):
            new_data.append(all_include[data_len[i]:data_len[i + 1]])

        return new_data

    # Raise an error for unsupported input types
    raise ValueError('Unsupported input type for normalization and power transform.')


@normalizer_register('norm')
def normalize(data:Union[List, Dict, np.ndarray], mean=None, std=None):
    """
    Normalize the data using the given mean and standard deviation or compute them from the data if not provided.

    Parameters:
    - data (ndarray): The data to be normalized.
    - mean (float, optional): If provided, use this mean for normalization. Otherwise, compute from the data.
    - std (float, optional): If provided, use this standard deviation for normalization. Otherwise, compute from the data.

    Returns:
    - ndarray: Normalized data.
    """


    # Compute mean and std from data if not provided
    if isinstance(data, np.ndarray):
        if mean is None:
            mean = np.mean(data)
        if std is None:
            std = np.std(data)
        return (data - mean) / std
    elif isinstance(data, list):
        tmp = []
        for d in data:
            if mean is None:
                mean = np.mean(d)
            if std is None:
                std = np.std(d)
            tmp.append((d - mean) / std)
        return tmp
    else:
        raise TypeError("Input data must be a numpy array or a list of numpy arrays.")


================================================
FILE: transopt/utils/Prior.py
================================================
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)


import warnings
import weakref
import numpy as np
from scipy.special import gammaln, digamma
from GPy.util.linalg import pdinv
from paramz.domains import _REAL, _POSITIVE, _NEGATIVE


class Prior(object):
    domain = None
    _instance = None
    def __new__(cls, *args, **kwargs):
        if not cls._instance or cls._instance.__class__ is not cls:
                newfunc = super(Prior, cls).__new__
                if newfunc is object.__new__:
                    cls._instance = newfunc(cls)
                else:
                    cls._instance = newfunc(cls, *args, **kwargs)
                return cls._instance

    def pdf(self, x):
        return np.exp(self.lnpdf(x))

    def plot(self):
        import sys

        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from GPy.plotting.matplot_dep import priors_plots

        priors_plots.univariate_plot(self)

    def __repr__(self, *args, **kwargs):
        return self.__str__()


class Gaussian(Prior):
    """
    Implementation of the univariate Gaussian probability function, coupled with random variables.

    :param mu: mean
    :param sigma: standard deviation

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _REAL
    _instances = []

    def __new__(cls, mu=0, sigma=1):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().mu == mu and instance().sigma == sigma:
                    return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, mu, sigma)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, mu, sigma):
        self.mu = float(mu)
        self.sigma = float(sigma)
        self.sigma2 = np.square(self.sigma)
        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)

    def __str__(self):
        return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)

    def lnpdf(self, x):
        return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2

    def lnpdf_grad(self, x):
        return -(x - self.mu) / self.sigma2

    def rvs(self, n):
        return np.random.randn(n) * self.sigma + self.mu

    def getstate(self):
        return self.mu, self.sigma

    def setstate(self, state):
        self.mu = state[0]
        self.sigma = state[1]
        self.sigma2 = np.square(self.sigma)
        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)

class Uniform(Prior):
    _instances = []

    def __new__(cls, lower=0, upper=1):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().lower == lower and instance().upper == upper:
                    return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, lower, upper)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, lower, upper):
        self.lower = float(lower)
        self.upper = float(upper)
        assert self.lower < self.upper, "Lower needs to be strictly smaller than upper."
        if self.lower >= 0:
            self.domain = _POSITIVE
        elif self.upper <= 0:
            self.domain = _NEGATIVE
        else:
            self.domain = _REAL

    def __str__(self):
        return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)

    def lnpdf(self, x):
        region = (x >= self.lower) * (x <= self.upper)
        return region

    def lnpdf_grad(self, x):
        return np.zeros(x.shape)

    def rvs(self, n):
        return np.random.uniform(self.lower, self.upper, size=n)

#     def __getstate__(self):
#         return self.lower, self.upper
#
#     def __setstate__(self, state):
#         self.lower = state[0]
#         self.upper = state[1]

class LogGaussian(Gaussian):
    """
    Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.

    :param mu: mean
    :param sigma: standard deviation

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _POSITIVE
    _instances = []

    def __new__(cls, mu=0, sigma=1, name=''):  # Singleton:
        # if cls._instances:
        #     cls._instances[:] = [instance for instance in cls._instances if instance()]
        #     for instance in cls._instances:
        #         if instance().mu == mu and instance().sigma == sigma:
        #             return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, mu, sigma)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, mu, sigma, name):
        self.mu = float(mu)
        self.sigma = float(sigma)
        self.sigma2 = np.square(self.sigma)
        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
        self.name = name

    def __str__(self):
        return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)

    def lnpdf(self, x):
        return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)

    def lnpdf_grad(self, x):
        return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x

    def rvs(self, n):
        return np.exp(np.random.randn(int(n)) * self.sigma + self.mu)

    def getstate(self):
        return self.mu, self.sigma

    def setstate(self, state):
        self.mu = state[0]
        self.sigma = state[1]
        self.sigma2 = np.square(self.sigma)
        self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)

class MultivariateGaussian(Prior):
    """
    Implementation of the multivariate Gaussian probability function, coupled with random variables.

    :param mu: mean (N-dimensional array)
    :param var: covariance matrix (NxN)

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _REAL
    _instances = []

    def __new__(cls, mu=0, var=1):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if
                                 instance()]
            for instance in cls._instances:
                if np.all(instance().mu == mu) and np.all(
                        instance().var == var):
                    return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, mu, var)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, mu, var):
        self.mu = np.array(mu).flatten()
        self.var = np.array(var)
        assert len(self.var.shape) == 2, 'Covariance must be a matrix'
        assert self.var.shape[0] == self.var.shape[1], \
            'Covariance must be a square matrix'
        assert self.var.shape[0] == self.mu.size
        self.input_dim = self.mu.size
        self.inv, _, self.hld, _ = pdinv(self.var)
        self.constant = -0.5 * (self.input_dim * np.log(2 * np.pi) + self.hld)

    def __str__(self):
        return 'MultiN(' + str(self.mu) + ', ' + str(np.diag(self.var)) + ')'

    def summary(self):
        raise NotImplementedError

    def pdf(self, x):
        x = np.array(x).flatten()
        return np.exp(self.lnpdf(x))

    def lnpdf(self, x):
        x = np.array(x).flatten()
        d = x - self.mu
        return self.constant - 0.5 * np.dot(d.T, np.dot(self.inv, d))

    def lnpdf_grad(self, x):
        x = np.array(x).flatten()
        d = x - self.mu
        return - np.dot(self.inv, d)

    def rvs(self, n):
        return np.random.multivariate_normal(self.mu, self.var, n)

    def plot(self):
        import sys

        assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
        from GPy.plotting.matplot_dep import priors_plots

        priors_plots.multivariate_plot(self)

    def __getstate__(self):
        return self.mu, self.var

    def __setstate__(self, state):
        self.mu = np.array(state[0]).flatten()
        self.var = state[1]
        assert len(self.var.shape) == 2, 'Covariance must be a matrix'
        assert self.var.shape[0] == self.var.shape[1], \
            'Covariance must be a square matrix'
        assert self.var.shape[0] == self.mu.size
        self.input_dim = self.mu.size
        self.inv, _, self.hld, _ = pdinv(self.var)
        self.constant = -0.5 * (self.input_dim * np.log(2 * np.pi) + self.hld)

def gamma_from_EV(E, V):
    warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
    return Gamma.from_EV(E, V)


class Gamma(Prior):
    """
    Implementation of the Gamma probability function, coupled with random variables.

    :param a: shape parameter
    :param b: rate parameter (warning: it's the *inverse* of the scale)

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _POSITIVE
    _instances = []

    def __new__(cls, a=1, b=.5, name = ''):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().a == a and instance().b == b and instance().name == name:
                    return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, a, b)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    @property
    def a(self):
        return self._a

    @property
    def b(self):
        return self._b

    def __init__(self, a, b, name=''):
        self._a = float(a)
        self._b = float(b)
        self.name = name
        self.constant = -gammaln(self.a) + a * np.log(b)

    def __str__(self):
        return "Ga({:.2g}, {:.2g})".format(self.a, self.b)

    def summary(self):
        ret = {"E[x]": self.a / self.b, \
               "E[ln x]": digamma(self.a) - np.log(self.b), \
               "var[x]": self.a / self.b / self.b, \
               "Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
        if self.a > 1:
            ret['Mode'] = (self.a - 1.) / self.b
        else:
            ret['mode'] = np.nan
        return ret

    def lnpdf(self, x):
        return self.constant + (self.a - 1) * np.log(x) - self.b * x

    def lnpdf_grad(self, x):
        return (self.a - 1.) / x - self.b

    def rvs(self, n):
        return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)


    def getstate(self):
        return self.a, self.b

    def update(self, value):
        self._a += 1
        self._b += value

    @staticmethod
    def from_EV(E, V):
        """
        Creates an instance of a Gamma Prior  by specifying the Expected value(s)
        and Variance(s) of the distribution.

        :param E: expected value
        :param V: variance
        """
        a = np.square(E) / V
        b = E / V
        return Gamma(a, b)

    def __getstate__(self):
        return self.a, self.b

    def __setstate__(self, state):
        self._a = state[0]
        self._b = state[1]
        self.constant = -gammaln(self.a) + self.a * np.log(self.b)

class InverseGamma(Gamma):
    """
    Implementation of the inverse-Gamma probability function, coupled with random variables.

    :param a: shape parameter
    :param b: rate parameter (warning: it's the *inverse* of the scale)

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _POSITIVE
    _instances = []

    def __str__(self):
        return "iGa({:.2g}, {:.2g})".format(self.a, self.b)

    def summary(self):
        return {}

    @staticmethod
    def from_EV(E, V):
        raise NotImplementedError

    def lnpdf(self, x):
        return self.constant - (self.a + 1) * np.log(x) - self.b / x

    def lnpdf_grad(self, x):
        return -(self.a + 1.) / x + self.b / x ** 2

    def rvs(self, n):
        return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)

class DGPLVM_KFDA(Prior):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable function using
    Kernel Fisher Discriminant Analysis by Seung-Jean Kim for implementing Face paper
    by Chaochao Lu.

    :param lambdaa: constant
    :param sigma2: constant

    .. Note:: Surpassing Human-Level Face paper dgplvm implementation

    """
    domain = _REAL
    # _instances = []
    # def __new__(cls, lambdaa, sigma2):  # Singleton:
    #     if cls._instances:
    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
    #         for instance in cls._instances:
    #             if instance().mu == mu and instance().sigma == sigma:
    #                 return instance()
    #     o = super(Prior, cls).__new__(cls, mu, sigma)
    #     cls._instances.append(weakref.ref(o))
    #     return cls._instances[-1]()

    def __init__(self, lambdaa, sigma2, lbl, kern, x_shape):
        """A description for init"""
        self.datanum = lbl.shape[0]
        self.classnum = lbl.shape[1]
        self.lambdaa = lambdaa
        self.sigma2 = sigma2
        self.lbl = lbl
        self.kern = kern
        lst_ni = self.compute_lst_ni()
        self.a = self.compute_a(lst_ni)
        self.A = self.compute_A(lst_ni)
        self.x_shape = x_shape

    def get_class_label(self, y):
        for idx, v in enumerate(y):
            if v == 1:
                return idx
        return -1

    # This function assigns each data point to its own class
    # and returns the dictionary which contains the class name and parameters.
    def compute_cls(self, x):
        cls = {}
        # Appending each data point to its proper class
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in cls:
                cls[class_label] = []
            cls[class_label].append(x[j])
        if len(cls) > 2:
            for i in range(2, self.classnum):
                del cls[i]
        return cls

    def x_reduced(self, cls):
        x1 = cls[0]
        x2 = cls[1]
        x = np.concatenate((x1, x2), axis=0)
        return x

    def compute_lst_ni(self):
        lst_ni = []
        lst_ni1 = []
        lst_ni2 = []
        f1 = (np.where(self.lbl[:, 0] == 1)[0])
        f2 = (np.where(self.lbl[:, 1] == 1)[0])
        for idx in f1:
            lst_ni1.append(idx)
        for idx in f2:
            lst_ni2.append(idx)
        lst_ni.append(len(lst_ni1))
        lst_ni.append(len(lst_ni2))
        return lst_ni

    def compute_a(self, lst_ni):
        a = np.ones((self.datanum, 1))
        count = 0
        for N_i in lst_ni:
            if N_i == lst_ni[0]:
                a[count:count + N_i] = (float(1) / N_i) * a[count]
                count += N_i
            else:
                if N_i == lst_ni[1]:
                    a[count: count + N_i] = -(float(1) / N_i) * a[count]
                    count += N_i
        return a

    def compute_A(self, lst_ni):
        A = np.zeros((self.datanum, self.datanum))
        idx = 0
        for N_i in lst_ni:
            B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i))))
            A[idx:idx + N_i, idx:idx + N_i] = B
            idx += N_i
        return A

    # Here log function
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
        K = self.kern.K(x)
        a_trans = np.transpose(self.a)
        paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
        inv_part = pdinv(paran)[0]
        J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a)
        J_star = (1. / self.lambdaa) * J
        return (-1. / self.sigma2) * J_star

    # Here gradient function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
        K = self.kern.K(x)
        paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
        inv_part = pdinv(paran)[0]
        b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a)
        a_Minus_b = self.a - b
        a_b_trans = np.transpose(a_Minus_b)
        DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans))
        DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x)
        return (-1. / self.sigma2) * DJ_star_DX

    def rvs(self, n):
        return np.random.rand(n)  # A WRONG implementation

    def __str__(self):
        return 'DGPLVM_prior'

    def __getstate___(self):
        return self.lbl, self.lambdaa, self.sigma2, self.kern, self.x_shape

    def __setstate__(self, state):
        lbl, lambdaa, sigma2, kern, a, A, x_shape = state
        self.datanum = lbl.shape[0]
        self.classnum = lbl.shape[1]
        self.lambdaa = lambdaa
        self.sigma2 = sigma2
        self.lbl = lbl
        self.kern = kern
        lst_ni = self.compute_lst_ni()
        self.a = self.compute_a(lst_ni)
        self.A = self.compute_A(lst_ni)
        self.x_shape = x_shape


class DGPLVM(Prior):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.

    :param sigma2: constant

    .. Note:: DGPLVM for Classification paper implementation

    """
    domain = _REAL

    def __new__(cls, sigma2, lbl, x_shape):
        return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)

    def __init__(self, sigma2, lbl, x_shape):
        self.sigma2 = sigma2
        # self.x = x
        self.lbl = lbl
        self.classnum = lbl.shape[1]
        self.datanum = lbl.shape[0]
        self.x_shape = x_shape
        self.dim = x_shape[1]

    def get_class_label(self, y):
        for idx, v in enumerate(y):
            if v == 1:
                return idx
        return -1

    # This function assigns each data point to its own class
    # and returns the dictionary which contains the class name and parameters.
    def compute_cls(self, x):
        cls = {}
        # Appending each data point to its proper class
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in cls:
                cls[class_label] = []
            cls[class_label].append(x[j])
        return cls

    # This function computes mean of each class. The mean is calculated through each dimension
    def compute_Mi(self, cls):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
            class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i

    # Adding data points as tuple to the dictionary so that we can access indices
    def compute_indices(self, x):
        data_idx = {}
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in data_idx:
                data_idx[class_label] = []
            t = (j, x[j])
            data_idx[class_label].append(t)
        return data_idx

    # Adding indices to the list so we can access whole the indices
    def compute_listIndices(self, data_idx):
        lst_idx = []
        lst_idx_all = []
        for i in data_idx:
            if len(lst_idx) == 0:
                pass
                #Do nothing, because it is the first time list is created so is empty
            else:
                lst_idx = []
            # Here we put indices of each class in to the list called lst_idx_all
            for m in range(len(data_idx[i])):
                lst_idx.append(data_idx[i][m][0])
            lst_idx_all.append(lst_idx)
        return lst_idx_all

    # This function calculates between classes variances
    def compute_Sb(self, cls, M_i, M_0):
        Sb = np.zeros((self.dim, self.dim))
        for i in cls:
            B = (M_i[i] - M_0).reshape(self.dim, 1)
            B_trans = B.transpose()
            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
        return Sb

    # This function calculates within classes variances
    def compute_Sw(self, cls, M_i):
        Sw = np.zeros((self.dim, self.dim))
        for i in cls:
            N_i = float(len(cls[i]))
            W_WT = np.zeros((self.dim, self.dim))
            for xk in cls[i]:
                W = (xk - M_i[i])
                W_WT += np.outer(W, W)
            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
        return Sw

    # Calculating beta and Bi for Sb
    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
        # import pdb
        # pdb.set_trace()
        B_i = np.zeros((self.classnum, self.dim))
        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            # pdb.set_trace()
            # Calculating Bi
            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
        for k in range(self.datanum):
            for i in data_idx:
                N_i = float(len(data_idx[i]))
                if k in lst_idx_all[i]:
                    beta = (float(1) / N_i) - (float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
                else:
                    beta = -(float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
        return Sig_beta_B_i_all


    # Calculating W_j s separately so we can access all the W_j s anytime
    def compute_wj(self, data_idx, M_i):
        W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                xj = tpl[1]
                j = tpl[0]
                W_i[j] = (xj - M_i[i])
        return W_i

    # Calculating alpha and Wj for Sw
    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                k = tpl[0]
                for j in lst_idx_all[i]:
                    if k == j:
                        alpha = 1 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
                    else:
                        alpha = 0 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
        return Sig_alpha_W_i

    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        # sb_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))

    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        data_idx = self.compute_indices(x)
        lst_idx_all = self.compute_listIndices(data_idx)
        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
        W_i = self.compute_wj(data_idx, M_i)
        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)

        # Calculating inverse of Sb and its transpose and minus
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.1)[0]
        Sb_inv_N_trans = np.transpose(Sb_inv_N)
        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
        Sw_trans = np.transpose(Sw)

        # Calculating DJ/DXk
        DJ_Dxk = 2 * (
            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
                Sig_alpha_W_i))
        # Calculating derivative of the log of the prior
        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
        return DPx_Dx.T

    # def frb(self, x):
    #     from functools import partial
    #     from GPy.models import GradientChecker
    #     f = partial(self.lnpdf)
    #     df = partial(self.lnpdf_grad)
    #     grad = GradientChecker(f, df, x, 'X')
    #     grad.checkgrad(verbose=1)

    def rvs(self, n):
        return np.random.rand(n)  # A WRONG implementation

    def __str__(self):
        return 'DGPLVM_prior_Raq'


# ******************************************

from GPy.core import Parameterized
from GPy.core import Param

class DGPLVM_Lamda(Prior, Parameterized):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.

    :param sigma2: constant

    .. Note:: DGPLVM for Classification paper implementation

    """
    domain = _REAL
    # _instances = []
    # def __new__(cls, mu, sigma): # Singleton:
    #     if cls._instances:
    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
    #         for instance in cls._instances:
    #             if instance().mu == mu and instance().sigma == sigma:
    #                 return instance()
    #     o = super(Prior, cls).__new__(cls, mu, sigma)
    #     cls._instances.append(weakref.ref(o))
    #     return cls._instances[-1]()

    def __init__(self, sigma2, lbl, x_shape, lamda, name='DP_prior'):
        super(DGPLVM_Lamda, self).__init__(name=name)
        self.sigma2 = sigma2
        # self.x = x
        self.lbl = lbl
        self.lamda = lamda
        self.classnum = lbl.shape[1]
        self.datanum = lbl.shape[0]
        self.x_shape = x_shape
        self.dim = x_shape[1]
        self.lamda = Param('lamda', np.diag(lamda))
        self.link_parameter(self.lamda)

    def get_class_label(self, y):
        for idx, v in enumerate(y):
            if v == 1:
                return idx
        return -1

    # This function assigns each data point to its own class
    # and returns the dictionary which contains the class name and parameters.
    def compute_cls(self, x):
        cls = {}
        # Appending each data point to its proper class
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in cls:
                cls[class_label] = []
            cls[class_label].append(x[j])
        return cls

    # This function computes mean of each class. The mean is calculated through each dimension
    def compute_Mi(self, cls):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
            class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i

    # Adding data points as tuple to the dictionary so that we can access indices
    def compute_indices(self, x):
        data_idx = {}
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in data_idx:
                data_idx[class_label] = []
            t = (j, x[j])
            data_idx[class_label].append(t)
        return data_idx

    # Adding indices to the list so we can access whole the indices
    def compute_listIndices(self, data_idx):
        lst_idx = []
        lst_idx_all = []
        for i in data_idx:
            if len(lst_idx) == 0:
                pass
                #Do nothing, because it is the first time list is created so is empty
            else:
                lst_idx = []
            # Here we put indices of each class in to the list called lst_idx_all
            for m in range(len(data_idx[i])):
                lst_idx.append(data_idx[i][m][0])
            lst_idx_all.append(lst_idx)
        return lst_idx_all

    # This function calculates between classes variances
    def compute_Sb(self, cls, M_i, M_0):
        Sb = np.zeros((self.dim, self.dim))
        for i in cls:
            B = (M_i[i] - M_0).reshape(self.dim, 1)
            B_trans = B.transpose()
            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
        return Sb

    # This function calculates within classes variances
    def compute_Sw(self, cls, M_i):
        Sw = np.zeros((self.dim, self.dim))
        for i in cls:
            N_i = float(len(cls[i]))
            W_WT = np.zeros((self.dim, self.dim))
            for xk in cls[i]:
                W = (xk - M_i[i])
                W_WT += np.outer(W, W)
            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
        return Sw

    # Calculating beta and Bi for Sb
    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
        # import pdb
        # pdb.set_trace()
        B_i = np.zeros((self.classnum, self.dim))
        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            # pdb.set_trace()
            # Calculating Bi
            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
        for k in range(self.datanum):
            for i in data_idx:
                N_i = float(len(data_idx[i]))
                if k in lst_idx_all[i]:
                    beta = (float(1) / N_i) - (float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
                else:
                    beta = -(float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
        return Sig_beta_B_i_all


    # Calculating W_j s separately so we can access all the W_j s anytime
    def compute_wj(self, data_idx, M_i):
        W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                xj = tpl[1]
                j = tpl[0]
                W_i[j] = (xj - M_i[i])
        return W_i

    # Calculating alpha and Wj for Sw
    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                k = tpl[0]
                for j in lst_idx_all[i]:
                    if k == j:
                        alpha = 1 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
                    else:
                        alpha = 0 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
        return Sig_alpha_W_i

    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)

        #!!!!!!!!!!!!!!!!!!!!!!!!!!!
        #self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()

        xprime = x.dot(np.diagflat(self.lamda))
        x = xprime
        # print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))

    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
        xprime = x.dot(np.diagflat(self.lamda))
        x = xprime
        # print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        data_idx = self.compute_indices(x)
        lst_idx_all = self.compute_listIndices(data_idx)
        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
        W_i = self.compute_wj(data_idx, M_i)
        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)

        # Calculating inverse of Sb and its transpose and minus
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.5))[0]
        Sb_inv_N = pdinv(Sb + np.eye(Sb.shape[0])*0.9)[0]
        Sb_inv_N_trans = np.transpose(Sb_inv_N)
        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
        Sw_trans = np.transpose(Sw)

        # Calculating DJ/DXk
        DJ_Dxk = 2 * (
            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
                Sig_alpha_W_i))
        # Calculating derivative of the log of the prior
        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)

        DPxprim_Dx = np.diagflat(self.lamda).dot(DPx_Dx)

        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
        DPxprim_Dx = DPxprim_Dx.T

        DPxprim_Dlamda = DPx_Dx.dot(x)

        # Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
        DPxprim_Dlamda = DPxprim_Dlamda.T

        self.lamda.gradient = np.diag(DPxprim_Dlamda)
        # print DPxprim_Dx
        return DPxprim_Dx


    # def frb(self, x):
    #     from functools import partial
    #     from GPy.models import GradientChecker
    #     f = partial(self.lnpdf)
    #     df = partial(self.lnpdf_grad)
    #     grad = GradientChecker(f, df, x, 'X')
    #     grad.checkgrad(verbose=1)

    def rvs(self, n):
        return np.random.rand(n)  # A WRONG implementation

    def __str__(self):
        return 'DGPLVM_prior_Raq_Lamda'

# ******************************************

class DGPLVM_T(Prior):
    """
    Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.

    :param sigma2: constant

    .. Note:: DGPLVM for Classification paper implementation

    """
    domain = _REAL
    # _instances = []
    # def __new__(cls, mu, sigma): # Singleton:
    #     if cls._instances:
    #         cls._instances[:] = [instance for instance in cls._instances if instance()]
    #         for instance in cls._instances:
    #             if instance().mu == mu and instance().sigma == sigma:
    #                 return instance()
    #     o = super(Prior, cls).__new__(cls, mu, sigma)
    #     cls._instances.append(weakref.ref(o))
    #     return cls._instances[-1]()

    def __init__(self, sigma2, lbl, x_shape, vec):
        self.sigma2 = sigma2
        # self.x = x
        self.lbl = lbl
        self.classnum = lbl.shape[1]
        self.datanum = lbl.shape[0]
        self.x_shape = x_shape
        self.dim = x_shape[1]
        self.vec = vec


    def get_class_label(self, y):
        for idx, v in enumerate(y):
            if v == 1:
                return idx
        return -1

    # This function assigns each data point to its own class
    # and returns the dictionary which contains the class name and parameters.
    def compute_cls(self, x):
        cls = {}
        # Appending each data point to its proper class
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in cls:
                cls[class_label] = []
            cls[class_label].append(x[j])
        return cls

    # This function computes mean of each class. The mean is calculated through each dimension
    def compute_Mi(self, cls):
        M_i = np.zeros((self.classnum, self.dim))
        for i in cls:
            # Mean of each class
            # class_i = np.multiply(cls[i],vec)
            class_i = cls[i]
            M_i[i] = np.mean(class_i, axis=0)
        return M_i

    # Adding data points as tuple to the dictionary so that we can access indices
    def compute_indices(self, x):
        data_idx = {}
        for j in range(self.datanum):
            class_label = self.get_class_label(self.lbl[j])
            if class_label not in data_idx:
                data_idx[class_label] = []
            t = (j, x[j])
            data_idx[class_label].append(t)
        return data_idx

    # Adding indices to the list so we can access whole the indices
    def compute_listIndices(self, data_idx):
        lst_idx = []
        lst_idx_all = []
        for i in data_idx:
            if len(lst_idx) == 0:
                pass
                #Do nothing, because it is the first time list is created so is empty
            else:
                lst_idx = []
            # Here we put indices of each class in to the list called lst_idx_all
            for m in range(len(data_idx[i])):
                lst_idx.append(data_idx[i][m][0])
            lst_idx_all.append(lst_idx)
        return lst_idx_all

    # This function calculates between classes variances
    def compute_Sb(self, cls, M_i, M_0):
        Sb = np.zeros((self.dim, self.dim))
        for i in cls:
            B = (M_i[i] - M_0).reshape(self.dim, 1)
            B_trans = B.transpose()
            Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
        return Sb

    # This function calculates within classes variances
    def compute_Sw(self, cls, M_i):
        Sw = np.zeros((self.dim, self.dim))
        for i in cls:
            N_i = float(len(cls[i]))
            W_WT = np.zeros((self.dim, self.dim))
            for xk in cls[i]:
                W = (xk - M_i[i])
                W_WT += np.outer(W, W)
            Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
        return Sw

    # Calculating beta and Bi for Sb
    def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
        # import pdb
        # pdb.set_trace()
        B_i = np.zeros((self.classnum, self.dim))
        Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            # pdb.set_trace()
            # Calculating Bi
            B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
        for k in range(self.datanum):
            for i in data_idx:
                N_i = float(len(data_idx[i]))
                if k in lst_idx_all[i]:
                    beta = (float(1) / N_i) - (float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
                else:
                    beta = -(float(1) / self.datanum)
                    Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
        Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
        return Sig_beta_B_i_all


    # Calculating W_j s separately so we can access all the W_j s anytime
    def compute_wj(self, data_idx, M_i):
        W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                xj = tpl[1]
                j = tpl[0]
                W_i[j] = (xj - M_i[i])
        return W_i

    # Calculating alpha and Wj for Sw
    def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
        Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
        for i in data_idx:
            N_i = float(len(data_idx[i]))
            for tpl in data_idx[i]:
                k = tpl[0]
                for j in lst_idx_all[i]:
                    if k == j:
                        alpha = 1 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
                    else:
                        alpha = 0 - (float(1) / N_i)
                        Sig_alpha_W_i[k] += (alpha * W_i[j])
        Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
        return Sig_alpha_W_i

    # This function calculates log of our prior
    def lnpdf(self, x):
        x = x.reshape(self.x_shape)
        xprim = x.dot(self.vec)
        x = xprim
        # print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #print 'SB_inv: ', Sb_inv_N
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
        return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))

    # This function calculates derivative of the log of prior function
    def lnpdf_grad(self, x):
        x = x.reshape(self.x_shape)
        xprim = x.dot(self.vec)
        x = xprim
        # print x
        cls = self.compute_cls(x)
        M_0 = np.mean(x, axis=0)
        M_i = self.compute_Mi(cls)
        Sb = self.compute_Sb(cls, M_i, M_0)
        Sw = self.compute_Sw(cls, M_i)
        data_idx = self.compute_indices(x)
        lst_idx_all = self.compute_listIndices(data_idx)
        Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
        W_i = self.compute_wj(data_idx, M_i)
        Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)

        # Calculating inverse of Sb and its transpose and minus
        # Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
        #Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
        #print 'SB_inv: ',Sb_inv_N
        #Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
        Sb_inv_N = pdinv(Sb+np.eye(Sb.shape[0])*0.1)[0]
        Sb_inv_N_trans = np.transpose(Sb_inv_N)
        Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
        Sw_trans = np.transpose(Sw)

        # Calculating DJ/DXk
        DJ_Dxk = 2 * (
            Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
                Sig_alpha_W_i))
        # Calculating derivative of the log of the prior
        DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
        return DPx_Dx.T

    # def frb(self, x):
    #     from functools import partial
    #     from GPy.models import GradientChecker
    #     f = partial(self.lnpdf)
    #     df = partial(self.lnpdf_grad)
    #     grad = GradientChecker(f, df, x, 'X')
    #     grad.checkgrad(verbose=1)

    def rvs(self, n):
        return np.random.rand(n)  # A WRONG implementation

    def __str__(self):
        return 'DGPLVM_prior_Raq_TTT'


class HalfT(Prior):
    """
    Implementation of the half student t probability function, coupled with random variables.

    :param A: scale parameter
    :param nu: degrees of freedom

    """
    domain = _POSITIVE
    _instances = []

    def __new__(cls, A, nu):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().A == A and instance().nu == nu:
                    return instance()
        o = super(Prior, cls).__new__(cls, A, nu)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, A, nu):
        self.A = float(A)
        self.nu = float(nu)
        self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)

    def __str__(self):
        return "hT({:.2g}, {:.2g})".format(self.A, self.nu)

    def lnpdf(self, theta):
        return (theta > 0) * (self.constant - .5*(self.nu + 1) * np.log(1. + (1./self.nu) * (theta/self.A)**2))

        # theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
        # lnpdfs = np.zeros_like(theta)
        # theta = np.array([theta])
        # above_zero = theta.flatten()>1e-6
        # v = self.nu
        # sigma2=self.A
        # stop
        # lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
        #     - gammaln(v * 0.5)
        #     - 0.5*np.log(sigma2 * v * np.pi)
        #     - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
        # )
        # return lnpdfs

    def lnpdf_grad(self, theta):
        theta = theta if isinstance(theta, np.ndarray) else np.array([theta])
        grad = np.zeros_like(theta)
        above_zero = theta > 1e-6
        v = self.nu
        sigma2 = self.A
        grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
        return grad

    def rvs(self, n):
        # return np.random.randn(n) * self.sigma + self.mu
        from scipy.stats import t
        # [np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
        ret = t.rvs(self.nu, loc=0, scale=self.A, size=n)
        ret[ret < 0] = 0
        return ret


class Exponential(Prior):
    """
    Implementation of the Exponential probability function,
    coupled with random variables.

    :param l: shape parameter

    """
    domain = _POSITIVE
    _instances = []

    def __new__(cls, l):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().l == l:
                    return instance()
        o = super(Exponential, cls).__new__(cls, l)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, l):
        self.l = l

    def __str__(self):
        return "Exp({:.2g})".format(self.l)

    def summary(self):
        ret = {"E[x]": 1. / self.l,
               "E[ln x]": np.nan,
               "var[x]": 1. / self.l**2,
               "Entropy": 1. - np.log(self.l),
               "Mode": 0.}
        return ret

    def lnpdf(self, x):
        return np.log(self.l) - self.l * x

    def lnpdf_grad(self, x):
        return - self.l

    def rvs(self, n):
        return np.random.exponential(scale=self.l, size=n)

class StudentT(Prior):
    """
    Implementation of the student t probability function, coupled with random variables.

    :param mu: mean
    :param sigma: standard deviation
    :param nu: degrees of freedom

    .. Note:: Bishop 2006 notation is used throughout the code

    """
    domain = _REAL
    _instances = []

    def __new__(cls, mu=0, sigma=1, nu=4):  # Singleton:
        if cls._instances:
            cls._instances[:] = [instance for instance in cls._instances if instance()]
            for instance in cls._instances:
                if instance().mu == mu and instance().sigma == sigma and instance().nu == nu:
                    return instance()
        newfunc = super(Prior, cls).__new__
        if newfunc is object.__new__:
            o = newfunc(cls)
        else:
            o = newfunc(cls, mu, sigma, nu)
        cls._instances.append(weakref.ref(o))
        return cls._instances[-1]()

    def __init__(self, mu, sigma, nu):
        self.mu = float(mu)
        self.sigma = float(sigma)
        self.sigma2 = np.square(self.sigma)
        self.nu = float(nu)

    def __str__(self):
        return "St({:.2g}, {:.2g}, {:.2g})".format(self.mu, self.sigma, self.nu)

    def lnpdf(self, x):
        from scipy.stats import t
        return t.logpdf(x,self.nu,self.mu,self.sigma)

    def lnpdf_grad(self, x):
        return -(self.nu + 1.)*(x - self.mu)/( self.nu*self.sigma2 + np.square(x - self.mu) )

    def rvs(self, n):
        from scipy.stats import t
        ret = t.rvs(self.nu, loc=self.mu, scale=self.sigma, size=n)
        return ret


================================================
FILE: transopt/utils/Read.py
================================================
import os
import pandas as pd
import requests


def read_file(file_path)->pd.DataFrame:
    _, file_extension = os.path.splitext(file_path)

    if file_extension:
        # Determine and read based on file extension
        if file_extension == '.json':
            return pd.read_json(file_path)
        elif file_extension == '.txt':
            return pd.read_csv(file_path, sep='\t')  # Adjust delimiter as needed
        elif file_extension == '.csv':
            df = pd.read_csv(file_path)
            unnamed_columns = [col for col in df.columns if "--unlimited" in col]
            df.drop(unnamed_columns, axis=1, inplace=True)
            return df
        elif file_extension in ['.xls', '.xlsx']:
            return pd.read_excel(file_path)
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
    else:
        # No file extension, attempt different methods to read
        try:
            return pd.read_csv(file_path)
        except:
            pass  # Continue trying if CSV fails
        try:
            return pd.read_excel(file_path)
        except:
            pass  # Continue trying if Excel fails
        try:
            return pd.read_json(file_path)
        except:
            pass  # Continue trying if JSON fails

        try:
            return pd.read_csv(file_path, sep='\t')  # Assuming it might be a TXT file
        except:
            pass  # Continue trying if TXT fails

        raise ValueError("File could not be read with any method. Ensure the file format is correct.")


def read_url(url):
    # 定义UCI和OpenML的URL模式
    uci_pattern = "archive.ics.uci.edu"
    openml_pattern = "openml.org"

    # 初始化数据集来源
    data_source = None

    # 尝试从URL下载数据
    try:
        response = requests.get(url)
        data = response.text

        # 检测URL是否指向UCI或OpenML
        if uci_pattern in url:
            data_source = "UCI"
        elif openml_pattern in url:
            data_source = "OpenML"

        # 返回数据和数据来源信息
        return data, data_source

    except requests.RequestException as e:
        return None, data_source


================================================
FILE: transopt/utils/Visualization.py
================================================
import os
import warnings
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
from itertools import product

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE."

from benchmark.synthetic import synthetic_problems
from transopt.utils.serialization import ndarray_to_vectors, vectors_to_ndarray
from transopt.utils.Normalization import normalize


def visual_contour(
    optimizer,
    testsuites,
    train_x,
    train_y,
    Ac_candi,
    test_size=101,
    ac_model=None,
    dtype=np.float64,
):
    # Initialize plots
    f, ax = plt.subplots(2, 2, figsize=(16, 16))

    search_space_info = optimizer.get_spaceinfo("search")

    var_name = [var["name"] for var in search_space_info]
    search_bound = [(var["domain"][0], var["domain"][1]) for var in search_space_info]

    # optimizers = problem.optimizers
    xgrid_0, xgrid_1 = np.meshgrid(
        np.linspace(search_bound[0][0], search_bound[0][1], test_size, dtype=dtype),
        np.linspace(search_bound[1][0], search_bound[1][1], test_size, dtype=dtype),
    )
    test_x = np.concatenate(
        (
            xgrid_0.reshape((xgrid_0.shape[0] * xgrid_0.shape[1], 1)),
            xgrid_1.reshape((xgrid_0.shape[0] * xgrid_0.shape[1], 1)),
        ),
        axis=1,
    )

    test_vec = ndarray_to_vectors(var_name, test_x)

    observed_pred_y, observed_corv = optimizer.predict(test_x)
    observed_pred_y = observed_pred_y.reshape(xgrid_0.shape)
    observed_corv = observed_corv.reshape(xgrid_0.shape)

    # Calculate the true value
    test_x_design = [optimizer._to_designspace(v) for v in test_vec]
    testsuites.lock()
    test_y = testsuites.f(test_x_design)
    test_y = [y["function_value"] for y in test_y]

    mean = np.mean(train_y)
    std = np.std(train_y)
    test_y = normalize(test_y, mean, std)
    test_y = np.array(test_y).reshape(xgrid_0.shape)

    # Calculate EI for the problem
    if ac_model is not None:
        test_ei = ac_model._compute_acq(test_x)
        test_ei = test_ei.reshape(xgrid_0.shape)

    candidate = optimizer._to_searchspace(Ac_candi[0])
    candidate = [v for x, v in candidate.items()]

    def ax_plot(title, ax, train_x, plot_y, test_size, cmap):
        ax.plot(train_x[:, 0], train_x[:, 1], "k*")
        # Predictive mean as blue line
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            h1 = ax.contourf(
                xgrid_0,
                xgrid_1,
                plot_y,
                np.arange(-3, 3.5, 0.5),
                cmap=cmap,
            )
            c1 = plt.colorbar(h1, ax=ax)
            # ax.clabel(C, inline=True)
            min_loc_1 = (
                int(np.argmin(plot_y) / test_size),
                np.remainder(np.argmin(plot_y), test_size),
            )
            ax.plot(xgrid_0[min_loc_1], xgrid_1[min_loc_1], "b*")

        ax.set_xlim([-1, 1])
        ax.set_title(title)

    # PLot true contour in the left plot
    ax_plot(
        "iter_" + str(train_x.shape[0]),
        ax[0][0],
        train_x,
        test_y.reshape(xgrid_0.shape),
        test_size,
        cm.Reds,
    )

    ax_plot(
        "Prediction",
        ax[0][1],
        train_x,
        observed_pred_y.reshape(xgrid_0.shape),
        test_size,
        cm.Blues,
    )

    def ax_plot_ei(title, ax, train_x, plot_ei, candidate, cmap):
        # Predictive mean as blue line
        h1 = ax.contourf(xgrid_0, xgrid_1, plot_ei, np.arange(-3, 3.5, 0.5), cmap=cmap)
        c1 = plt.colorbar(h1, ax=ax)
        max_loc = (
            int(np.argmax(plot_ei) / test_size),
            np.remainder(np.argmax(plot_ei), test_size),
        )
        ax.plot(xgrid_0[max_loc], xgrid_1[max_loc], "g*")
        ax.plot(candidate[0], candidate[1], color="orange", marker="*", linewidth=0)
        ax.set_title(title)

    if ac_model is not None:
        ax_plot_ei(
            "Acquisition Function", ax[1][1], train_x, test_ei, candidate, cm.Greens
        )

    # PLot covariance contour in the last row
    ax_plot(
        "Prediction covariance", ax[1][0], train_x, observed_corv, test_size, cm.Blues
    )

    plt.grid()

    Exper_folder = optimizer.exp_path
    if not os.path.exists(
        "{}/verbose/contour/{}/{}".format(
            Exper_folder, optimizer.optimizer_name, f"{testsuites.get_curname()}"
        )
    ):
        os.makedirs(
            "{}/verbose/contour/{}/{}".format(
                Exper_folder, optimizer.optimizer_name, f"{testsuites.get_curname()}"
            )
        )

    plt.savefig(
        "{}/verbose/contour/{}/{}/{}.png".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"iter_{testsuites.get_query_num()}",
        ),
        format="png",
    )
    plt.close()
    testsuites.unlock()


def visual_oned(
    optimizer, testsuites, train_x, train_y, Ac_candi, ac_model=None, dtype=np.float64
):
    # Initialize plots
    f, ax = plt.subplots(1, 1, figsize=(8, 8))

    search_space_info = optimizer.get_spaceinfo("search")

    var_name = [var["name"] for var in search_space_info]
    search_bound = [
        search_space_info[0]["domain"][0],
        search_space_info[0]["domain"][1],
    ]
    test_x = np.arange(search_bound[0], search_bound[1] + 0.005, 0.005, dtype=dtype)

    observed_pred_y, observed_corv = optimizer.predict(test_x[:, np.newaxis])
    test_vec = ndarray_to_vectors(var_name, test_x[:, np.newaxis])
    # Calculate the true value
    test_x_design = [optimizer._to_designspace(v) for v in test_vec]
    testsuites.lock()
    test_y = testsuites.f(test_x_design)
    test_y = np.array([y["function_value"] for y in test_y])

    y_mean = np.mean(train_y)
    y_std = np.std(train_y)
    test_y = normalize(test_y, y_mean, y_std)
    train_y_temp = normalize(train_y, y_mean, y_std)

    # Calculate EI for the problem
    if ac_model is not None:
        test_ei = ac_model._compute_acq(test_x[:, np.newaxis])

    pre_mean = observed_pred_y
    pre_best_y = np.min(pre_mean)
    pre_best_x = test_x[np.argmin(pre_mean)]
    pre_up = observed_pred_y + observed_corv
    pre_low = observed_pred_y - observed_corv

    ax.plot(test_x, test_y, "r-", linewidth=1, alpha=1)
    ax.plot(test_x, pre_mean[:, 0], "b-", linewidth=1, alpha=1)
    if ac_model is not None:
        ax.plot(test_x, test_ei[:, 0], "g-", linewidth=1, alpha=1)

    candidate = optimizer._to_searchspace(Ac_candi[0])
    ax.plot(train_x[:, 0], train_y_temp[:, 0], marker="*", color="black", linewidth=0)
    ax.plot(candidate[var_name[0]], 0, marker="*", color="orange", linewidth=0)
    ax.plot(pre_best_x, pre_best_y, marker="*", color="blue", linewidth=0)
    ax.fill_between(test_x, pre_up[:, 0], pre_low[:, 0], alpha=0.2, facecolor="blue")

    Exper_folder = optimizer.exp_path
    if not os.path.exists(
        "{}/verbose/oneD/{}/{}".format(
            Exper_folder, optimizer.optimizer_name, f"{testsuites.get_curname()}"
        )
    ):
        os.makedirs(
            "{}/verbose/oneD/{}/{}".format(
                Exper_folder, optimizer.optimizer_name, f"{testsuites.get_curname()}"
            )
        )

    ax.legend()
    plt.grid()

    plt.savefig(
        "{}/verbose/oneD/{}/{}/{}.png".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"iter_{testsuites.get_query_num()}",
            format="png",
        )
    )

    os.makedirs(
        "{}/verbose/oneD/{}/{}/".format(
            Exper_folder, optimizer.optimizer_name, f"{testsuites.get_curname()}"
        ),
        exist_ok=True,
    )
    np.savetxt(
        "{}/verbose/oneD/{}/{}/{}_true.txt".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"{testsuites.get_query_num()}",
        ),
        np.concatenate((test_x[:, np.newaxis], test_y[:, np.newaxis]), axis=1),
    )
    np.savetxt(
        "{}/verbose/oneD/{}/{}/{}_pred_y.txt".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"{testsuites.get_query_num()}",
        ),
        np.concatenate((test_x[:, np.newaxis], observed_pred_y), axis=1),
    )
    np.savetxt(
        "{}/verbose/oneD/{}/{}/{}_cov_lower.txt".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"{testsuites.get_query_num()}",
        ),
        np.concatenate(
            (test_x[:, np.newaxis], observed_pred_y - observed_corv), axis=1
        ),
    )
    np.savetxt(
        "{}/verbose/oneD/{}/{}/{}_cov_higher.txt".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"{testsuites.get_query_num()}",
        ),
        np.concatenate(
            (test_x[:, np.newaxis], observed_pred_y + observed_corv), axis=1
        ),
    )
    if ac_model is not None:
        np.savetxt(
            "{}/verbose/oneD/{}/{}/{}_ei.txt".format(
                Exper_folder,
                optimizer.optimizer_name,
                f"{testsuites.get_curname()}",
                f"{testsuites.get_query_num()}",
            ),
            np.concatenate((test_x[:, np.newaxis], test_ei), axis=1),
        )

    np.savetxt(
        "{}/verbose/oneD/{}/{}/{}_train.txt".format(
            Exper_folder,
            optimizer.optimizer_name,
            f"{testsuites.get_curname()}",
            f"{testsuites.get_query_num()}",
        ),
        np.concatenate((train_x, train_y_temp), axis=1),
    )

    plt.close()
    testsuites.unlock()


def visual_pf(
    optimizer, testsuites, train_x, train_y, Ac_candi, ac_model=None, dtype=np.float64
):
    f, ax = plt.subplots(1, 1, figsize=(8, 8))

    search_space_info = optimizer.get_spaceinfo("search")

    final_pfront = pareto.find_pareto_only_y(obs_points_dic["ParEGO"])
    pfront_sorted = final_pfront[final_pfront[:, 0].argsort(), :]
    plt.scatter(pfront_sorted[:, 0], pfront_sorted[:, 1], c="r", label="ParEGO")
    plt.vlines(pfront_sorted[0, 0], ymin=pfront_sorted[0, 1], ymax=w_ref[1], colors="r")
    for i in range(pfront_sorted.shape[0] - 1):
        plt.hlines(
            y=pfront_sorted[i, 1],
            xmin=pfront_sorted[i, 0],
            xmax=pfront_sorted[i + 1, 0],
            colors="r",
        )
        plt.vlines(
            x=pfront_sorted[i + 1, 0],
            ymin=pfront_sorted[i + 1, 1],
            ymax=pfront_sorted[i, 1],
            colors="r",
        )
    plt.hlines(
        y=pfront_sorted[-1, 1], xmin=pfront_sorted[-1, 0], xmax=w_ref[0], colors="r"
    )

    var_name = [var["name"] for var in search_space_info]
    search_bound = [
        search_space_info[0]["domain"][0],
        search_space_info[0]["domain"][1],
    ]
    test_x = np.arange(search_bound[0], search_bound[1] + 0.005, 0.005, dtype=dtype)

    observed_pred_y, observed_corv = optimizer.predict(test_x[:, np.newaxis])
    test_vec = ndarray_to_vectors(var_name, test_x[:, np.newaxis])
    # Calculate the true value
    test_x_design = [optimizer._to_designspace(v) for v in test_vec]
    testsuites.lock()
    test_y = testsuites.f(test_x_design)
    test_y = np.array([y["function_value"] for y in test_y])

    y_mean = np.mean(train_y)
    y_std = np.std(train_y)
    test_y = normalize(test_y, y_mean, y_std)
    train_y_temp = normalize(train_y, y_mean, y_std)


================================================
FILE: transopt/utils/__init__.py
================================================


================================================
FILE: transopt/utils/check.py
================================================
import os
import re
import requests
import ipaddress
from urllib.parse import urlparse


def  check_dir(self):
    # Validate path
    if self.path and not (os.path.exists(self.path) and os.path.isfile(self.path)):
        raise ValueError("Provided path is not a valid file")


def check_url(url):
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except:
        return False


def check_ip_address(ip_address):
    try:
        ipaddress.ip_address(ip_address)
        return True
    except ValueError:
        return False

================================================
FILE: transopt/utils/encoding.py
================================================
import pandas as pds

def target_encoding(df:pds.DataFrame, column_name, target_name):
    """
    计算给定列的目标编码。

    参数:
    dataframe (pandas.DataFrame): 包含特征和目标列的DataFrame。
    column_name (str): 需要进行目标编码的列名。
    target_name (str): 目标变量的列名。

    返回:
    dict: 包含每个唯一值及其目标编码的字典。
    """
    # 计算每个唯一值的目标均值
    target_mean = df.groupby(column_name)[target_name].mean()
    target_rank = target_mean.rank(method='average')

    df[f'mean_encoding'] = df.groupby(column_name)[target_name].transform('mean')
    df[f'rank_encoding'] = df[column_name].map(target_rank)
    # target_rank = target_mean.rank
    print(df[[column_name, target_name, f'mean_encoding', f'rank_encoding']].head(10))

    encodings = {value: key for key, value in target_rank.to_dict().items()}

    # 返回结果字典
    return encodings

def multitarget_encoding(df:pds.DataFrame, column_name, target_names):
    encodings = {}
    for target in target_names:
        # 计算每个唯一值的目标均值
        target_mean = df.groupby(column_name)[target].mean()
        encodings[target] = target_mean.to_dict()
    return encodings

================================================
FILE: transopt/utils/hypervolume.py
================================================

import numpy as np
import itertools as it

def find_pareto(X, y):
    """
    find pareto set in X and pareto frontier in y

    Paremeters
    ----------
    X : numpy.array
        input data
    y : numpy.array
        output data

    Return
    ------
    pareto_front : numpy.array
        pareto frontier in y
    pareto_set : numpy.array
        pareto set in X
    """
    y_copy = np.copy(y)
    pareto_front = np.zeros((0 ,y.shape[1]))
    pareto_set = np.zeros((0 ,X.shape[1]))
    i = 0
    j = 0
    while i < y_copy.shape[0]:
        y_outi = np.delete(y_copy, i, axis  =0)
        # paretoだったら全部false
        flag = np.all(y_outi <= y_copy[i ,:] ,axis = 1)
        if not np.any(flag):
            pareto_front = np.append(pareto_front, [y_copy[i ,:]] ,axis = 0)
            pareto_set = np.append(pareto_set, [X[j ,:]] ,axis = 0)
            i += 1
        else :
            y_copy = np.delete(y_copy, i, axis= 0)
        j += 1
    return pareto_front, pareto_set

def find_pareto_only_y(y):
    """
    obtain only pareto frontier in y

    Parameters
    ----------
    y : numpy.array
        output data

    Returns
    -------
    pareto_front : numpy.array
        pareto frontier in y
    """
    y_copy = np.copy(y)
    pareto_front = np.zeros((0 ,y.shape[1]))
    i = 0

    while i < y_copy.shape[0]:
        y_outi = np.delete(y_copy, i, axis  =0)
        # paretoだったら全部false
        flag = np.all(y_outi <= y_copy[i ,:] ,axis = 1)
        if not np.any(flag):
            pareto_front = np.append(pareto_front, [y_copy[i ,:]] ,axis = 0)
            i += 1
        else :
            y_copy = np.delete(y_copy, i, axis= 0)
    return pareto_front


def create_cells(pf, ref, ref_inv=None):
    '''
       从N个帕累托前沿创建被帕累托前沿支配的区域的独立单元格数组（最小化目标）。

       参数
       ----
       pf : numpy array
           帕累托前沿（N \times L）
       ref : numpy array
           界定目标上界的参考点（L）
       ref_inv : numpy array
           界定目标下界的参考点（L）（为方便计算）

       返回
       ----
       lower : numpy array
           帕累托前沿截断区域中M个单元格的下界（M \times L）
       upper : numpy array
           帕累托前沿截断区域中M个单元格的上界（M \times L）
       '''
    N, L = np.shape(pf)

    if ref_inv is None:
        ref_inv = np.min(pf, axis=0)

    if N == 1:
        # 1つの場合そのまま返してよし
        return np.atleast_2d(pf), np.atleast_2d(ref)
    else:
        # refと作る超体積が最も大きいものをpivotとする
        hv = np.prod(pf - ref, axis=1)
        pivot_index = np.argmax(hv)
        pivot = pf[pivot_index]
        # print('pivot :', pivot)

        # pivotはそのままcellになる
        lower = np.atleast_2d(pivot)
        upper = np.atleast_2d(ref)

        # 2^Lの全組み合わせに対して再帰を回す
        for i in it.product(range(2), repeat=L):
            # 全て1のところにはパレートフロンティアはもう無い
            # 全て0のところはシンプルなセルになるので上で既に追加済
            iter_index = np.array(list(i)) == 0
            if (np.sum(iter_index) == 0) or (np.sum(iter_index) == L):
                continue

            # 新しい基準点(pivot座標からiの1が立っているところだけref座標に変換)
            new_ref = pivot.copy()
            new_ref[iter_index] = ref[iter_index]

            # 新しいlower側の基準点(計算の都合上) (下側基準点座標からiの1が立っているところだけpivot座標に変換)
            new_ref_inv = ref_inv.copy()
            new_ref_inv[iter_index] = pivot[iter_index]

            # new_refより全次元で大きいPareto解は残しておく必要あり
            new_pf = pf[(pf < new_ref).all(axis=1), :]
            # new_ref_invに支配されていない点はnew_refとnew_ref_invの作る超直方体に射影する
            new_pf[new_pf < new_ref_inv] = np.tile(new_ref_inv, (new_pf.shape[0], 1))[new_pf < new_ref_inv]

            # 再帰
            if np.size(new_pf) > 0:
                child_lower, child_upper = create_cells(new_pf, new_ref, new_ref_inv)

                lower = np.r_[lower, np.atleast_2d(child_lower)]
                upper = np.r_[upper, np.atleast_2d(child_upper)]

    return lower, upper


def find_pareto_from_posterior(X, mean, y):
    """
    find pareto frontier in predict mean of GPR and pareto set in X

    Parameters
    ----------
    X : numpy.array
        input data
    mean : numpy.array
        predict mean of GPR
    y : numpy.array
        output data

    Returns
    -------
    pareto_front : numpy.array
        pareto frontier in y defined by predict mean
    pareto_set : numpy.array
        pareto set in X
    """
    mean_copy = np.copy(mean)
    pareto_front = np.zeros((0 ,mean.shape[1]))
    pareto_set = np.zeros((0 ,X.shape[1]))
    i = 0
    j = 0
    while i < mean_copy.shape[0]:
        mean_outi = np.delete(mean_copy, i, axis  =0)
        # paretoだったら全部false
        flag = np.all(mean_outi <= mean_copy[i ,:] ,axis = 1)
        if not np.any(flag):
            pareto_front = np.append(pareto_front, [y[j ,:]] ,axis = 0)
            pareto_set = np.append(pareto_set, [X[j ,:]] ,axis = 0)
            i += 1
        else :
            mean_copy = np.delete(mean_copy, i, axis= 0)
        j += 1
    return pareto_front, pareto_set


def calc_hypervolume(y, w_ref):
    """
    calculate pareto hypervolume

    Parameters
    ----------
    y : numpy.array
        output data
    w_ref : numpy.array
        reference point for calculating hypervolume

    Returns
    -------
    hypervolume : float
        pareto hypervolume
    """
    hypervolume = 0.0e0
    pareto_front = find_pareto_only_y(y)
    v, w = create_cells(pareto_front, w_ref)

    if v.ndim == 1:
        hypervolume = np.prod(w - v)
    else:
        hypervolume = np.sum(np.prod(w - v, axis=1))
    return hypervolume


================================================
FILE: transopt/utils/log.py
================================================
import logging

from rich.logging import RichHandler

loggers = {}

LOGGER_NAME = "Transopt"


def get_logger(logger_name: str) -> logging.Logger:
    # https://rich.readthedocs.io/en/latest/reference/logging.html#rich.logging.RichHandler
    # https://rich.readthedocs.io/en/latest/logging.html#handle-exceptions
    if logger_name in loggers:
        return loggers[logger_name]
    
    _logger = logging.getLogger(logger_name) 
    rich_handler = RichHandler(
        show_time=False,
        rich_tracebacks=False,
        show_path=True,
        tracebacks_show_locals=False,
    )
    rich_handler.setFormatter(
        logging.Formatter(
            fmt="%(message)s",
            datefmt="[%X]",
        )
    )

    file_handler = logging.FileHandler('application.log') 
    file_handler.setFormatter(
        logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    )
    
    _logger.handlers.clear()
    _logger.addHandler(rich_handler)
    # _logger.addHandler(file_handler)
    _logger.setLevel(logging.INFO)
    _logger.propagate = False
    
    loggers[logger_name] = _logger
    return _logger

# logger = logging.getLogger(LOGGER_NAME)
# logger.setLevel(logging.DEBUG)

logger = get_logger(LOGGER_NAME)


================================================
FILE: transopt/utils/openml_data_manager.py
================================================
"""
This file includes code adapted from HPOBench (https://github.com/automl/HPOBench),
which is licensed under the Apache License 2.0. A copy of the license can be
found at http://www.apache.org/licenses/LICENSE-2.0.
"""


""" OpenMLDataManager organizing the data for the benchmarks with data from
OpenML-tasks.

DataManager organizing the download of the data.
The load function of a DataManger downloads the data given an unique OpenML
identifier. It splits the data in train, test and optional validation splits.
It can be distinguished between holdout and cross-validation data sets.

For Non-OpenML data sets please use the hpobench.util.data_manager.
"""

import os
import abc
import logging
import tarfile
import requests
import openml
import numpy as np
from pathlib import Path
from typing import Tuple, List, Union
from zipfile import ZipFile
from oslo_concurrency import lockutils
from sklearn.model_selection import train_test_split

from transopt.utils.rng_helper import get_rng


# TODO: 考虑使用 config 模块管理
def _check_dir(path: Path):
    """ Check whether dir exists and if not create it"""
    Path(path).mkdir(exist_ok=True, parents=True)

cache_dir = os.environ.get('OPENML_CACHE_HOME', '~/.cache/transopt')
data_dir = os.environ.get('OPENML_DATA_HOME', '~/.local/share/transopt')
cache_dir = Path(cache_dir).expanduser().absolute()
data_dir = Path(data_dir).expanduser().absolute()
_check_dir(cache_dir)
_check_dir(data_dir)


def get_openml100_taskids():
    """
    Return task ids for the OpenML100 data ets
    See also here: https://www.openml.org/s/14
    Reference: https://arxiv.org/abs/1708.03731
    """
    return [
        258, 259, 261, 262, 266, 267, 271, 273, 275, 279, 283, 288, 2120,
        2121, 2125, 336, 75093, 75092, 75095, 75097, 75099, 75103, 75107,
        75106, 75109, 75108, 75112, 75129, 75128, 75135, 146574, 146575,
        146572, 146573, 146578, 146579, 146576, 146577, 75154, 146582,
        146583, 75156, 146580, 75159, 146581, 146586, 146587, 146584,
        146585, 146590, 146591, 146588, 146589, 75169, 146594, 146595,
        146592, 146593, 146598, 146599, 146596, 146597, 146602, 146603,
        146600, 146601, 75181, 146604, 146605, 75215, 75217, 75219, 75221,
        75225, 75227, 75231, 75230, 75232, 75235, 3043, 75236, 75239, 3047,
        232, 233, 236, 3053, 3054, 3055, 241, 242, 244, 245, 246, 248, 250,
        251, 252, 253, 254,
    ]


def get_openmlcc18_taskids():
    """
    Return task ids for the OpenML-CC18 data sets
    See also here: https://www.openml.org/s/99
    TODO: ADD reference
    """
    return [167149, 167150, 167151, 167152, 167153, 167154, 167155, 167156, 167157,
            167158, 167159, 167160, 167161, 167162, 167163, 167165, 167166, 167167,
            167168, 167169, 167170, 167171, 167164, 167173, 167172, 167174, 167175,
            167176, 167177, 167178, 167179, 167180, 167181, 167182, 126025, 167195,
            167194, 167190, 167191, 167192, 167193, 167187, 167188, 126026, 167189,
            167185, 167186, 167183, 167184, 167196, 167198, 126029, 167197, 126030,
            167199, 126031, 167201, 167205, 189904, 167106, 167105, 189905, 189906,
            189907, 189908, 189909, 167083, 167203, 167204, 189910, 167202, 167097,
            ]


def _load_data(task_id: int):
    """ Helper-function to load the data from the OpenML website. """
    task = openml.tasks.get_task(task_id)

    try:
        # This should throw an ValueError!
        task.get_train_test_split_indices(fold=0, repeat=1)
        raise AssertionError(f'Task {task_id} has more than one repeat. This '
                             f'benchmark can only work with a single repeat.')
    except ValueError:
        pass

    try:
        # This should throw an ValueError!
        task.get_train_test_split_indices(fold=1, repeat=0)
        raise AssertionError(f'Task {task_id} has more than one fold. This '
                             f'benchmark can only work with a single fold.')
    except ValueError:
        pass

    train_indices, test_indices = task.get_train_test_split_indices()

    X, y = task.get_X_and_y()

    X_train = X[train_indices]
    y_train = y[train_indices]
    X_test = X[test_indices]
    y_test = y[test_indices]

    # TODO replace by more efficient function which only reads in the data
    # saved in the arff file describing the attributes/features
    dataset = task.get_dataset()
    _, _, categorical_indicator, _ = dataset.get_data(target=task.target_name)
    variable_types = ['categorical' if ci else 'numerical' for ci in categorical_indicator]

    return X_train, y_train, X_test, y_test, variable_types, dataset.name

class DataManager(abc.ABC, metaclass=abc.ABCMeta):
    """ Base Class for loading and managing the data.

    Attributes
    ----------
    logger : logging.Logger

    """

    def __init__(self):
        self.logger = logging.getLogger("DataManager")

    @abc.abstractmethod
    def load(self):
        """ Loads data from data directory as defined in
        config_file.data_directory
        """
        raise NotImplementedError()

    def create_save_directory(self, save_dir: Path):
        """ Helper function. Check if data directory exists. If not, create it.

        Parameters
        ----------
        save_dir : Path
            Path to the directory. where the data should be stored
        """
        if not save_dir.is_dir():
            self.logger.debug(f'Create directory {save_dir}')
            save_dir.mkdir(parents=True, exist_ok=True)

    @lockutils.synchronized('not_thread_process_safe', external=True,
                            lock_path=f'{cache_dir}/lock_download_file', delay=0.5)
    def _download_file_with_progressbar(self, data_url: str, data_file: Path):
        data_file = Path(data_file)

        if data_file.exists():
            self.logger.info('Data File already exists. Skip downloading.')
            return

        self.logger.info(f"Download the file from {data_url} to {data_file}")
        data_file.parent.mkdir(parents=True, exist_ok=True)

        from tqdm import tqdm
        r = requests.get(data_url, stream=True)
        with open(data_file, 'wb') as f:
            total_length = int(r.headers.get('content-length'))
            for chunk in tqdm(r.iter_content(chunk_size=1024),
                              unit_divisor=1024, unit='kB', total=int(total_length / 1024) + 1):
                if chunk:
                    _ = f.write(chunk)
                    f.flush()
        self.logger.info(f"Finished downloading to {data_file}")

    @lockutils.synchronized('not_thread_process_safe', external=True,
                            lock_path=f'{cache_dir}/lock_unzip_file', delay=0.5)
    def _untar_data(self, compressed_file: Path, save_dir: Union[Path, None] = None):
        self.logger.debug('Extract the compressed data')
        with tarfile.open(compressed_file, 'r') as fh:
            if save_dir is None:
                save_dir = compressed_file.parent
            fh.extractall(save_dir)
        self.logger.debug(f'Successfully extracted the data to {save_dir}')

    @lockutils.synchronized('not_thread_process_safe', external=True,
                            lock_path=f'{cache_dir}/lock_unzip_file', delay=0.5)
    def _unzip_data(self, compressed_file: Path, save_dir: Union[Path, None] = None):
        self.logger.debug('Extract the compressed data')
        with ZipFile(compressed_file, 'r') as fh:
            if save_dir is None:
                save_dir = compressed_file.parent
            fh.extractall(save_dir)
        self.logger.debug(f'Successfully extracted the data to {save_dir}')

class HoldoutDataManager(DataManager):
    """  Base Class for loading and managing the Holdout data sets.

    Attributes
    ----------
    X_train : np.ndarray
    y_train : np.ndarray
    X_valid : np.ndarray
    y_valid : np.ndarray
    X_test : np.ndarray
    y_test : np.ndarray
    """

    def __init__(self):
        super().__init__()

        self.X_train = None
        self.y_train = None
        self.X_valid = None
        self.y_valid = None
        self.X_test = None
        self.y_test = None
    
    
class CrossvalidationDataManager(DataManager):
    """
    Base Class for loading and managing the cross-validation data sets.

    Attributes
    ----------
    X_train : np.ndarray
    y_train : np.ndarray
    X_test : np.ndarray
    y_test : np.ndarray
    """

    def __init__(self):
        super().__init__()

        self.X_train = None
        self.y_train = None
        self.X_test = None
        self.y_test = None


class OpenMLHoldoutDataManager(HoldoutDataManager):
    """ Base class for loading holdout data set from OpenML.

    Attributes
    ----------
    task_id : int
    rng : np.random.RandomState
    name : str
    variable_types : list
        Indicating the type of each feature in the loaded data
        (e.g. categorical, numerical)

    Parameters
    ----------
    openml_task_id : int
        Unique identifier for the task on OpenML
    rng : int, np.random.RandomState, None
        defines the random state
    """

    def __init__(self, openml_task_id: int, rng: Union[int, np.random.RandomState, None] = None):
        super(OpenMLHoldoutDataManager, self).__init__()

        self._save_to = data_dir / 'OpenML'
        self.task_id = openml_task_id
        self.rng = get_rng(rng=rng)
        self.name = None
        self.variable_types = None

        self.create_save_directory(self._save_to)

        openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
        openml.config.set_root_cache_directory(str(self._save_to))

    def load(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray,
                            np.ndarray, np.ndarray, np.ndarray]:
        """
        Loads dataset from OpenML in config_file.data_directory.
        Downloads data if necessary.

        Returns
        -------
        X_train: np.ndarray
        y_train: np.ndarray
        X_val: np.ndarray
        y_val: np.ndarray
        X_test: np.ndarray
        y_test: np.ndarray
        """

        self.X_train, self.y_train, self.X_test, self.y_test, self.variable_types, self.name = _load_data(self.task_id)

        self.X_train, self.X_valid, self.y_train, self.y_valid = train_test_split(self.X_train,
                                                                                  self.y_train,
                                                                                  test_size=0.33,
                                                                                  stratify=self.y_train,
                                                                                  random_state=self.rng)

        return self.X_train, self.y_train, self.X_valid, self.y_valid, self.X_test, self.y_test

    @staticmethod
    def replace_nans_in_cat_columns(X_train: np.ndarray, X_valid: np.ndarray, X_test: np.ndarray,
                                    is_categorical: Union[np.ndarray, List]) \
            -> Tuple[np.ndarray, np.ndarray, np.ndarray, List]:
        """ Helper function to replace nan values in categorical features / columns by a non-used value.
        Here: Min - 1.
        """
        _cat_data = np.concatenate([X_train, X_valid, X_test], axis=0)
        nan_index = np.isnan(_cat_data[:, is_categorical])
        categories = [np.unique(_cat_data[:, i][~nan_index[:, i]])
                      for i in range(X_train.shape[1]) if is_categorical[i]]
        replace_nans_with = np.nanmin(_cat_data[:, is_categorical], axis=0) - 1

        categories = [np.concatenate([replace_value.flatten(), cat])
                      for (replace_value, cat) in zip(replace_nans_with, categories)]

        def _find_and_replace(array, replace_nans_with):
            nan_idx = np.where(np.isnan(array))
            array[nan_idx] = np.take(replace_nans_with, nan_idx[1])
            return array

        X_train[:, is_categorical] = _find_and_replace(X_train[:, is_categorical], replace_nans_with)
        X_valid[:, is_categorical] = _find_and_replace(X_valid[:, is_categorical], replace_nans_with)
        X_test[:, is_categorical] = _find_and_replace(X_test[:, is_categorical], replace_nans_with)
        return X_train, X_valid, X_test, categories


class OpenMLCrossvalidationDataManager(CrossvalidationDataManager):
    """ Base class for loading cross-validation data set from OpenML.

    Attributes
    ----------
    task_id : int
    rng : np.random.RandomState
    name : str
    variable_types : list
        Indicating the type of each feature in the loaded data
        (e.g. categorical, numerical)

    Parameters
    ----------
    openml_task_id : int
        Unique identifier for the task on OpenML
    rng : int, np.random.RandomState, None
        defines the random state
    """

    def __init__(self, openml_task_id: int, rng: Union[int, np.random.RandomState, None] = None):
        super(OpenMLCrossvalidationDataManager, self).__init__()

        self._save_to = data_dir / 'OpenML'
        self.task_id = openml_task_id
        self.rng = get_rng(rng=rng)
        self.name = None
        self.variable_types = None

        self.create_save_directory(self._save_to)

        openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
        openml.config.set_cache_directory(str(self._save_to))

    def load(self):
        """
        Loads dataset from OpenML in config_file.data_directory.
        Downloads data if necessary.
        """

        X_train, y_train, X_test, y_test, variable_types, name = \
            _load_data(self.task_id)

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.variable_types = variable_types
        self.name = name

        return self.X_train, self.y_train, self.X_test, self.y_test


================================================
FILE: transopt/utils/pareto.py
================================================
'''
Pareto-related tools.
'''

import numpy as np
from collections.abc import Iterable
from pymoo.indicators.hv import Hypervolume


def convert_minimization(Y, obj_type=None):
    '''
    Convert maximization to minimization.

    Example usage:
    Y = np.array([[1, 4, 3], [2, 1, 4], [3, 2, 2]])
    obj_type = ['min', 'max', 'min']
    Y_minimized = convert_minimization(Y, obj_type)
    '''
    if obj_type is None: 
        return Y

    if isinstance(obj_type, str):
        obj_type = [obj_type] * Y.shape[1]
    assert isinstance(obj_type, Iterable), f'Objective type {type(obj_type)} is not supported'

    maxm_idx = np.array(obj_type) == 'max'
    Y = Y.copy()
    Y[:, maxm_idx] = -Y[:, maxm_idx]

    return Y

def find_pareto_front(Y, return_index=False, obj_type=None, eps=1e-8):
    '''
    Find pareto front (undominated part) of the input performance data.
    '''
    if len(Y) == 0: return np.array([])

    Y = convert_minimization(Y, obj_type)

    sorted_indices = np.argsort(Y.T[0])
    pareto_indices = []
    for idx in sorted_indices:
        # check domination relationship
        if not (np.logical_and((Y[idx] - Y > -eps).all(axis=1), (Y[idx] - Y > eps).any(axis=1))).any():
            pareto_indices.append(idx)
    pareto_front = np.atleast_2d(Y[pareto_indices].copy())

    if return_index:
        return pareto_front, pareto_indices
    else:
        return pareto_front
    

def check_pareto(Y, obj_type=None):
    '''
    Check pareto optimality of the input performance data

    Example usage:
    Y = np.array([[1, 2], [2, 1], [1.5, 1.5]])
    pareto_optimal = check_pareto(Y)
    '''
    Y = convert_minimization(Y, obj_type)

    # find pareto indices
    sorted_indices = np.argsort(Y.T[0])
    pareto = np.zeros(len(Y), dtype=bool)
    for idx in sorted_indices:
        # check domination relationship
        if not (np.logical_and((Y <= Y[idx]).all(axis=1), (Y < Y[idx]).any(axis=1))).any():
            pareto[idx] = True
    return pareto


def calc_hypervolume(Y, ref_point, obj_type=None):
    '''
    Calculate hypervolume

    Example usage:
    Y = np.array([[1, 2], [2, 1], [1.5, 1.5]])
    ref_point = np.array([2.5, 2.5])
    hypervolume = calc_hypervolume(Y, ref_point)
    '''
    Y = convert_minimization(Y, obj_type)

    return Hypervolume(ref_point=ref_point).do(Y)


def calc_pred_error(Y, Y_pred_mean, average=False):
    '''
    Calculate prediction error
    '''
    assert len(Y.shape) == len(Y_pred_mean.shape) == 2
    pred_error = np.abs(Y - Y_pred_mean)
    if average:
        pred_error = np.sum(pred_error, axis=0) / len(Y)
    return pred_error

================================================
FILE: transopt/utils/path.py
================================================
import os
from pathlib import Path


def get_library_path():
    home = Path.home()
    library_dir_name = "transopt_files"
    library_path = home / library_dir_name

    if not library_path.exists():
        library_path.mkdir(parents=True, exist_ok=True)

    return library_path

def get_absolut_path():
    lib_path = get_library_path()
    absolut_dir_name = "Absolut"
    absolut_path = lib_path / absolut_dir_name
    
    if not absolut_path.exists():
        absolut_path.mkdir(parents=True, exist_ok=True)
    
    return absolut_path


def get_log_file_path():
    lib_path = get_library_path()
    log_filename = "runtime.log"
    return lib_path / log_filename


================================================
FILE: transopt/utils/plot.py
================================================
import matplotlib.pyplot as plt
from matplotlib import cm


def plot2D(X, Y, c='black', ls='', marker='o', fillstyle=None, label=None, ax=None, file=None, show=False,
           show_legend=False, bounds=None,title=None,disconnect=None):
    if ax is None:
        _, ax = plt.subplots(1, 1)
    if disconnect is None:
        ax.plot(X, Y, c=c, ls=ls, marker=marker, label=label,fillstyle=fillstyle)
    else:
        for l in disconnect:
            ax.plot(X[l], Y[l], c=c, ls=ls, marker=marker, label=label, fillstyle=fillstyle)
    ax.set_xlabel('$f_1(\mathbf{x})$', fontsize=13)
    ax.set_ylabel('$f_2(\mathbf{x})$', fontsize=13)
    ax.tick_params(axis='both', labelsize=13)

    if show or file is not None:
        plt.grid()
        if show_legend:
            plt.legend()
        if bounds is not None:
            plt.xlim((bounds[0, 0], bounds[0, 1]))
            plt.ylim((bounds[1, 0], bounds[1, 1]))
    if title:
        plt.title(title)
    if file is not None:
        plt.savefig(file, format='pdf')
    if show:
        plt.show()
    if file is None and show is False:
        return ax
    return None


def plot3D(X, Y, Z, c='black', ls='', marker='o', fillstyle=None, label=None, ax=None, file=None, show=False,
           show_legend=False, bounds=None,title=None):
    if ax is None:
        _, ax = plt.subplots(subplot_kw={"projection": "3d"})
    ax.plot(X, Y, Z, c=c, ls=ls, marker=marker, label=label,fillstyle=fillstyle)
    ax.set_xlabel('$f_1(\mathbf{x})$', fontsize=13)
    ax.set_ylabel('$f_2(\mathbf{x})$', fontsize=13)
    ax.set_zlabel('$f_3(\mathbf{x})$', fontsize=13)
    ax.tick_params(axis='both', labelsize=13)

    if show or file is not None:
        plt.grid()
        if show_legend:
            plt.legend()
        if bounds is not None:
            ax.set_xlim((bounds[0, 0], bounds[0, 1]))
            ax.set_ylim((bounds[1, 0], bounds[1, 1]))
            ax.set_zlim((bounds[2, 0], bounds[2, 1]))
    if title:
        plt.title(title)
    if file is not None:
        plt.savefig(file, format='pdf')
    if show:
        plt.show()
    if file is None and show is False:
        return ax
    return None


def surface3D(X_grid, Y_grid, cmap=cm.Blues, ax=None, file=None, show=False,label=None):
    if ax is None:
        _, ax = plt.subplots(subplot_kw={"projection": "3d"})
    ax.set_xlabel('$x_1$', fontsize=13)
    ax.set_ylabel('$x_2$', fontsize=13)
    ax.set_zlabel('f(\mathbf{x})', fontsize=13)
    ax.plot_surface(X_grid, X_grid.T, Y_grid, cmap=cmap,label=label)
    if file is not None:
        plt.grid()
        plt.savefig(file, format='pdf')
    if show:
        plt.grid()
        plt.show()
    if file is None and show is False:
        return ax
    return None

================================================
FILE: transopt/utils/profile.py
================================================
import cProfile
import functools

def profile_function(filename=None):
    def profiler_decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            profiler = cProfile.Profile()
            profiler.enable()
            
            # Execute the function
            result = func(*args, **kwargs)
            
            profiler.disable()
            # Save stats to a file
            profiler.dump_stats(filename if filename else func.__name__ + '_profile.prof')
            
            return result
        return wrapper
    return profiler_decorator

================================================
FILE: transopt/utils/rng_helper.py
================================================
"""
This file includes code adapted from HPOBench (https://github.com/automl/HPOBench),
which is licensed under the Apache License 2.0. A copy of the license can be
found at http://www.apache.org/licenses/LICENSE-2.0.
"""


""" Helper functions to easily obtain randomState """
from typing import Union, Tuple, List

import numpy as np


def get_rng(rng: Union[int, np.random.RandomState, None] = None,
            self_rng: Union[int, np.random.RandomState, None] = None) -> np.random.RandomState:
    """
    Helper function to obtain RandomState from int or create a new one.

    Sometimes a default random state (self_rng) is already available, but a
    new random state is desired. In this case ``rng`` is not None and not already
    a random state (int or None) -> a new random state is created.
    If ``rng`` is already a randomState, it is just returned.
    Same if ``rng`` is None, but the default rng is given.

    Parameters
    ----------
    rng : int, np.random.RandomState, None
    self_rng : np.random.RandomState, None

    Returns
    -------
    np.random.RandomState
    """

    if rng is not None:
        return _cast_int_to_random_state(rng)
    if rng is None and self_rng is not None:
        return _cast_int_to_random_state(self_rng)
    return np.random.RandomState()


def _cast_int_to_random_state(rng: Union[int, np.random.RandomState]) -> np.random.RandomState:
    """
    Helper function to cast ``rng`` from int to np.random.RandomState if necessary.

    Parameters
    ----------
    rng : int, np.random.RandomState

    Returns
    -------
    np.random.RandomState
    """
    if isinstance(rng, np.random.RandomState):
        return rng
    if int(rng) == rng:
        # As seed is sometimes -1 (e.g. if SMAC optimizes a deterministic function) -> use abs()
        return np.random.RandomState(np.abs(rng))
    raise ValueError(f"{rng} is neither a number nor a RandomState. Initializing RandomState failed")


def serialize_random_state(random_state: np.random.RandomState) -> Tuple[int, List, int, int, int]:
    (rnd0, rnd1, rnd2, rnd3, rnd4) = random_state.get_state()
    rnd1 = rnd1.tolist()
    return rnd0, rnd1, rnd2, rnd3, rnd4


def deserialize_random_state(random_state: Tuple[int, List, int, int, int]) -> np.random.RandomState:
    (rnd0, rnd1, rnd2, rnd3, rnd4) = random_state
    rnd1 = [np.uint32(number) for number in rnd1]
    random_state = np.random.RandomState()
    random_state.set_state((rnd0, rnd1, rnd2, rnd3, rnd4))
    return random_state


================================================
FILE: transopt/utils/serialization.py
================================================
import numpy as np
from abc import abstractmethod, ABC
from dataclasses import dataclass
from typing import Dict, Hashable, Tuple, List


@dataclass
class InputData:
    X: np.ndarray


@dataclass
class TaskData:
    X: np.ndarray
    Y: np.ndarray


# def vectors_to_ndarray(keys_order, X: List[Dict]) -> np.ndarray:
#     """Convert a list of input_vectors to a ndarray."""
#     # Converting dictionaries to lists using the order from keys_order
#     data = [[vec[key] for key in keys_order] for vec in X]

#     # Converting lists to ndarray
#     ndarray = np.array(data)

#     return ndarray

# def ndarray_to_vectors(keys_order, ndarray: np.ndarray) -> List[Dict]:
#     """Convert a ndarray to a list of dictionaries."""
#     # Converting ndarray to lists of values
#     data = ndarray.tolist()

#     # Converting lists of values to dictionaries using keys from keys_order
#     input_vectors = [{key: value for key, value in zip(keys_order, row)} for row in data]

#     return input_vectors

def output_to_ndarray(Y: List[Dict]) -> np.ndarray:
    """Extract function_value from each output and convert to ndarray."""
    # Extracting function_value from each dictionary in the list
    function_values = [[y for name, y in item.items()] for item in Y]

    # Converting list to ndarray
    ndarray = np.array(function_values)

    return ndarray

def multioutput_to_ndarray(output_value: List[Dict], num_output:int) -> np.ndarray:
    """Extract function_value from each output and convert to ndarray."""
    # Extracting function_value from each dictionary in the list
    function_values = []
    for i in range(1, num_output+1):
        function_values.append([item[f'function_value_{i}'] for item in output_value])

    # Converting list to ndarray
    ndarray = np.array(function_values)

    return ndarray


def convert_np_to_bulidin(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {key: convert_np_to_bulidin(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_np_to_bulidin(item) for item in obj]
    else:
        return obj

================================================
FILE: transopt/utils/sk.py
================================================
#!/usr/bin/env python3
# vim: sta:et:sw=2:ts=2:sts=2 :

from copy import deepcopy as kopy
import sys,random

"""
Scott-Knot test + non parametric effect size + significance tests.
Tim Menzies, 2019. Share and enjoy. No warranty. Caveat Emptor.

Accepts data as per the following exmaple (you can ignore the "*n"
stuff, that is just there for the purposes of demos on larger
and larger data)

Ouputs treatments, clustered such that things that have similar
results get the same ranks.

For a demo of this code, just run

    python3 sk.py

"""

#-----------------------------------------------------
# Examples

def skDemo(n=5) :
  #Rx.data is one way to run the code
  return Rx.data( x1 =[ 0.12, 0.21 ,0.51, 0.7]*n,
                  x2  =[0.6  ,0.7 , 0.8 , 0.89]*n,
                  x3  =[0.13 ,0.23, 0.38 , 0.38]*n,
                  x4  =[0.6  ,0.7,  0.8 , 0.9]*n,
                  x5  =[0.1  ,0.2,  0.3 , 0.4]*n)

"""
Another is to make a file

x1  0.34  0.49  0.51  0.6
x2  0.6   0.7   0.8   0.9
x3  0.15  0.25  0.4   0.35
x4  0.6   0.7   0.8   0.9
x5  0.1   0.2   0.3   0.4

Then call 

   Rx.fileIn( fileName )

"""

#-----------------------------------------------------
# Config

class o:
  def __init__(i,**d) : i.__dict__.update(**d)

class THE:
  cliffs = o(dull= [0.147, # small
                    0.33,  # medium
                    0.474 # large
                    ][0])
  bs=     o( conf=0.05,
             b=500)
  mine =  o( private="_")
  char =  o( skip="?")
  rx   =  o( show="%4s %10s %s")
  tile =  o( width=50,
             chops=[0.1 ,0.3,0.5,0.7,0.9],
             marks=[" " ,"-","-","-"," "],
             bar="|",
             star="*",
             show=" %5.3f")
#-----------------------------------------------------
def cliffsDeltaSlow(lst1,lst2, dull = THE.cliffs.dull):
  """Returns true if there are more than 'dull' difference.
     Warning: O(N)^2."""
  n= gt = lt = 0.0
  for x in lst1:
    for y in lst2:
      n += 1
      if x > y:  gt += 1
      if x < y:  lt += 1
  return abs(lt - gt)/n <= dull

def cliffsDelta(lst1, lst2,  dull=THE.cliffs.dull):
  "By pre-soring the lists, this cliffsDelta runs in NlogN time"
  def runs(lst):
    for j,two in enumerate(lst):
      if j == 0: one,i = two,0
      if one!=two:
        yield j - i,one
        i = j
      one=two
    yield j - i + 1,two
  #---------------------
  m, n = len(lst1), len(lst2)
  lst2 = sorted(lst2)
  j = more = less = 0
  for repeats,x in runs(sorted(lst1)):
    while j <= (n - 1) and lst2[j] <  x: j += 1
    more += j*repeats
    while j <= (n - 1) and lst2[j] == x: j += 1
    less += (n - j)*repeats
  d= (more - less) / (m*n)
  return abs(d)  <= dull

def bootstrap(y0,z0,conf=THE.bs.conf,b=THE.bs.b):
  """
  two  lists y0,z0 are the same if the same patterns can be seen in all of them, as well
  as in 100s to 1000s  sub-samples from each. 
  From p220 to 223 of the Efron text  'introduction to the boostrap'.
  Typically, conf=0.05 and b is 100s to 1000s.
  """
  class Sum():
    def __init__(i,some=[]):
      i.sum = i.n = i.mu = 0 ; i.all=[]
      for one in some: i.put(one)
    def put(i,x):
      i.all.append(x);
      i.sum +=x; i.n += 1; i.mu = float(i.sum)/i.n
    def __add__(i1,i2): return Sum(i1.all + i2.all)
  def testStatistic(y,z):
     tmp1 = tmp2 = 0
     for y1 in y.all: tmp1 += (y1 - y.mu)**2
     for z1 in z.all: tmp2 += (z1 - z.mu)**2
     s1    = float(tmp1)/(y.n - 0.9)
     s2    = float(tmp2)/(z.n - 0.9)
     delta = z.mu - y.mu
     if s1+s2:
       delta =  delta/((s1/y.n + s2/z.n)**0.5)
     return delta
  def one(lst): return lst[ int(any(len(lst))) ]
  def any(n)  : return random.uniform(0,n)
  y,z  = Sum(y0), Sum(z0)
  x    = y + z
  baseline = testStatistic(y,z)
  yhat = [y1 - y.mu + x.mu for y1 in y.all]
  zhat = [z1 - z.mu + x.mu for z1 in z.all]
  bigger = 0
  for i in range(b):
    if testStatistic(Sum([one(yhat) for _ in yhat]),
                     Sum([one(zhat) for _ in zhat])) > baseline:
      bigger += 1
  return bigger / b >= conf

#-------------------------------------------------------
# misc functions
def same(x): return x

class Mine:
  "class that, amongst other times, pretty prints objects"
  oid = 0
  def identify(i):
    Mine.oid += 1
    i.oid = Mine.oid
    return i.oid
  def __repr__(i):
    pairs = sorted([(k, v) for k, v in i.__dict__.items()
                    if k[0] != THE.mine.private])
    pre = i.__class__.__name__ + '{'
    def q(z):
     if isinstance(z,str): return "'%s'" % z
     if callable(z): return "fun(%s)" % z.__name__
     return str(z)
    return pre + ", ".join(['%s=%s' % (k, q(v))])

#-------------------------------------------------------
class Rx(Mine):
  "place to manage pairs of (TreatmentName,ListofResults)"
  def __init__(i, rx="",vals=[], key=same):
    i.rx   = rx
    i.vals = sorted([x for x in vals if x != THE.char.skip])
    i.n    = len(i.vals)
    i.med  = i.vals[int(i.n/2)]
    i.mu   = sum(i.vals)/i.n
    i.rank = 1
  def tiles(i,lo=0,hi=1): return  xtile(i.vals,lo,hi)
  def __lt__(i,j):        return i.med < j.med
  def __eq__(i,j):
    return cliffsDelta(i.vals,j.vals) and \
            bootstrap(i.vals,j.vals)
  def __repr__(i):
    return '%4s %10s %s' % (i.rank, i.rx, i.tiles())
  def xpect(i,j,b4):
    "Expected value of difference in emans before and after a split"
    n = i.n + j.n
    return i.n/n * (b4.med- i.med)**2 + j.n/n * (j.med-b4.med)**2

  #-- end instance methods --------------------------

  @staticmethod
  def data(**d):
    "convert dictionary to list of treatments"
    return [Rx(k,v) for k,v in d.items()]

  @staticmethod
  def fileIn(f):
    d={}
    what=None
    for word in words(f):
       x = thing(word)
       if isinstance(x,str): 
          what=x
          d[what] = d.get(what,[])
       else:
          d[what] += [x]
    # print('---------------')
    # print(Rx.data(**d))
    Rx.write(Rx.sk(Rx.data(**d)))

  @staticmethod
  def sum(rxs):
    "make a new rx from all the rxs' vals"
    all = []
    for rx in rxs:
        for val in rx.vals:
            all += [val]
    return Rx(vals=all)

  @staticmethod
  def show(rxs):
    "pretty print set of treatments"
    tmp=Rx.sum(rxs)
    lo,hi=tmp.vals[0], tmp.vals[-1]
    for rx in sorted(rxs):
        print(THE.rx.show % (rx.rank, rx.rx, rx.tiles()))

  @staticmethod
  def write(rxs):
    "pretty write set of treatments"
    tmp=Rx.sum(rxs)
    lo,hi=tmp.vals[0], tmp.vals[-1]
    with open('./scott_knot.txt', 'a') as write_f:
        for rx in sorted(rxs):
            write_f.write(THE.rx.show % (rx.rank, rx.rx, rx.tiles()) + '\r\n')


  @staticmethod
  def sk(rxs):
    "sort treatments and rank them"
    def divide(lo,hi,b4,rank):
      cut = left=right=None
      best = 0
      for j in range(lo+1,hi):
          left0  = Rx.sum( rxs[lo:j] )
          right0 = Rx.sum( rxs[j:hi] )
          now    = left0.xpect(right0, b4)
          if now > best:
              if left0 != right0:
                  best, cut,left,right = now,j,kopy(left0),kopy(right0)
      if cut:
        rank = divide(lo, cut, left, rank) + 1
        rank = divide(cut ,hi, right,rank)
      else:
        for rx in rxs[lo:hi]:
          rx.rank = rank
      return rank
    #-- sk main
    rxs=sorted(rxs)
    divide(0, len(rxs),Rx.sum(rxs),1)
    return rxs

#-------------------------------------------------------
def pairs(lst):
    "Return all pairs of items i,i+1 from a list."
    last=lst[0]
    for i in lst[1:]:
         yield last,i
         last = i

def words(f):
  with open(f) as fp:
    for line in fp:
       for word in line.split():
          yield word

def xtile(lst,lo,hi,
             width= THE.tile.width,
             chops= THE.tile.chops,
             marks= THE.tile.marks,
             bar=   THE.tile.bar,
             star=  THE.tile.star,
             show=  THE.tile.show):
  """The function _xtile_ takes a list of (possibly)
  unsorted numbers and presents them as a horizontal
  xtile chart (in ascii format). The default is a
  contracted _quintile_ that shows the
  10,30,50,70,90 breaks in the data (but this can be
  changed- see the optional flags of the function).
  """
  def pos(p)   : return ordered[int(len(lst)*p)]
  def place(x) :
    return int(width*float((x - lo))/(hi - lo+0.00001))
  def pretty(lst) :
    return ', '.join([show % x for x in lst])
  ordered = sorted(lst)
  lo      = min(lo,ordered[0])
  hi      = max(hi,ordered[-1])
  what    = [pos(p)   for p in chops]
  where   = [place(n) for n in  what]
  out     = [" "] * width
  for one,two in pairs(where):
    for i in range(one,two):
      out[i] = marks[0]
    marks = marks[1:]
  out[int(width/2)]    = bar
  out[place(pos(0.5))] = star
  return '('+''.join(out) +  ")," +  pretty(what)

def thing(x):
  "Numbers become numbers; every other x is a symbol."
  try: return int(x)
  except ValueError:
    try: return float(x)
    except ValueError:
      return x

#-------------------------------------------------------
def _cliffsDelta():
  "demo function"
  lst1=[1,2,3,4,5,6,7]*100
  n=1
  for _ in range(10):
      lst2=[x*n for x in lst1]
      print(cliffsDelta(lst1,lst2),n) # should return False
      n*=1.03

def bsTest(n=1000,mu1=10,sigma1=1,mu2=10.2,sigma2=1):
   def g(mu,sigma) : return random.gauss(mu,sigma)
   x = [g(mu1,sigma1) for i in range(n)]
   y = [g(mu2,sigma2) for i in range(n)]
   return n,mu1,sigma1,mu2,sigma2,\
          'same' if bootstrap(x,y) else 'different'

#-------------------------------------------------------

if __name__ == "__main__":
  # random.seed(1)

    b = [[2], [1, 4, 5, 7,11]]
    a = Rx.data(x1=[1], x2=[2,3,4],x3=[3])


    Rx.show(Rx.sk(a))


================================================
FILE: transopt/utils/weights.py
================================================
import math
import numpy as np


def _set_weight(w, c, v, unit, s, n_obj, dim):
    if dim == n_obj:
        v = np.zeros(shape=(n_obj, 1))
    if dim == 1:
        c = c + 1
        v[0] = unit - s
        w[:, c - 1] = v[:, 0]
        return w, c

    for i in range(unit - s + 1):
        v[dim - 1] = i
        w, c = _set_weight(w, c, v, unit, s + i, n_obj, dim - 1)
    return w, c


def _no_weight(unit, s, dim):
    m = 0
    if dim == 1:
        m = 1
        return m
    for i in range(unit - s + 1):
        m = m + _no_weight(unit, s + i, dim - 1)
    return m


def init_weight(n_obj, n_sample):
    if n_obj == 1:
        return np.expand_dims(np.linspace(0,1,n_sample),-1)
    u = math.floor(math.pow(n_sample, 1.0 / (n_obj - 1))) - 2

    m = 0
    while m < n_sample:
        u = u + 1
        m = _no_weight(u, 0, n_obj)
    if m != n_sample:
        print(f'Warning number of weights {n_sample} except {m}!')
    w = np.zeros(shape=(n_obj, m))
    c = 0
    v = np.zeros(shape=(n_obj, 1))
    w, c = _set_weight(w, c, v, u, 0, n_obj, n_obj)
    w = w / (u + 0.0)
    return w.T

def tchebycheff(X, W, ideal=None, normalize=False):
    """
    :param X:  data points np array with (1, n_var) or (n_sample, n_var)
    :param W:  weights np array with (1, n_var) or (n_sample, n_var)
    :param ideal:
    :param normalize:
    :param return_index:
    :return: np array with (n_sample, )
    """
    X = np.atleast_2d(X)
    W = np.atleast_2d(W)

    n_sample = X.shape[0]
    n_weight = W.shape[0]

    if n_sample == 1 and n_weight != 1:
        X = np.tile(X, (n_weight, 1))
    if n_weight == 1 and n_sample != 1:
        W = np.tile(W, (n_sample, 1))

    if ideal is None:
        ideal = np.zeros((1, X.shape[1]))
    if normalize:
        norm_x = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) - ideal
    else:
        norm_x = X - ideal

        return np.expand_dims(np.max(norm_x * W, axis=1), -1)

================================================
FILE: webui/.gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class


node_modules/


================================================
FILE: webui/LICENSE.md
================================================
MIT License

Copyright (c) 2022 Dashwind - Admin Dashboard Template

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: webui/package.json
================================================
{
  "name": "admin-dashboard-template-dashwind",
  "version": "1.0.0",
  "description": "Admin Dashboard template built with create-react-app, tailwind css and daisy UI. Template uses rich tailwind css utility classes and have components of daisy UI, also have redux toolkit implemented for store management.",
  "scripts": {
    "start": "react-scripts start",
    "build": "react-scripts build",
    "test": "react-scripts test",
    "eject": "react-scripts eject"
  },
  "dependencies": {
    "@chatui/core": "^2.4.2",
    "@heroicons/react": "^2.0.13",
    "@reduxjs/toolkit": "^1.9.0",
    "@testing-library/jest-dom": "^5.16.5",
    "@testing-library/react": "^13.4.0",
    "@testing-library/user-event": "^13.5.0",
    "antd": "^5.20.5",
    "axios": "^1.1.3",
    "bizcharts": "^4.1.23",
    "capitalize-the-first-letter": "^1.0.8",
    "chart.js": "^4.0.1",
    "dayjs": "^1.11.7",
    "echarts": "^5.5.1",
    "echarts-for-react": "^3.0.2",
    "moment": "^2.29.4",
    "react": "^18.3.1",
    "react-chartjs-2": "^5.0.1",
    "react-dom": "^18.3.1",
    "react-notifications": "^1.7.4",
    "react-redux": "^8.0.5",
    "react-router-dom": "^6.4.3",
    "react-scripts": "^5.0.1",
    "react-tailwindcss-datepicker": "^1.6.0",
    "reactstrap": "^9.2.2",
    "theme-change": "^2.2.0",
    "web-vitals": "^2.1.4"
  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/srobbin01/tailwind-dashboard-template-dashwind"
  },
  "keywords": [
    "reactjs",
    "tailwind-css",
    "starter-kit",
    "saas-starter-kit",
    "reduxt-toolkit-dashboard-template",
    "daisyui-template",
    "dashboard-template",
    "react-router",
    "react-charts"
  ],
  "author": "srobbin01",
  "license": "ISC",
  "bugs": {
    "url": "https://github.com/srobbin01/tailwind-dashboard-template-dashwind/issues"
  },
  "homepage": "",
  "eslintConfig": {
    "extends": [
      "react-app",
      "react-app/jest"
    ]
  },
  "browserslist": {
    "production": [
      ">0.2%",
      "not dead",
      "not op_mini all"
    ],
    "development": [
      "last 1 chrome version",
      "last 1 firefox version",
      "last 1 safari version"
    ]
  },
  "devDependencies": {
    "@tailwindcss/typography": "^0.5.8",
    "autoprefixer": "^10.4.13",
    "daisyui": "^4.4.19",
    "postcss": "^8.4.19",
    "tailwindcss": "^3.3.6"
  }
}


================================================
FILE: webui/public/index.html
================================================
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <link rel="icon" href="%PUBLIC_URL%/transopt.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <meta name="theme-color" content="#000000" />
    <meta
      name="description"
      content="A free admin dashboard template using Daisy UI and React js."
    />
    <link rel="apple-touch-icon" href="%PUBLIC_URL%/transopt.png" />
    <!--
      manifest.json provides metadata used when your web app is installed on a
      user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
    -->
    <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
    <!--
      Notice the use of %PUBLIC_URL% in the tags above.
      It will be replaced with the URL of the `public` folder during the build.
      Only files inside the `public` folder can be referenced from the HTML.

      Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
      work correctly both with client-side routing and a non-root public URL.
      Learn how to configure a non-root public URL by running `npm run build`.
    -->
    <title> TransOPT </title>
    <meta name="description" content="Get a customizable and easily-themed admin dashboard template using Daisy UI and React js. Boost your productivity with pre-configured redux toolkit and other libraries.">
  </head>
  <body>
    <noscript>You need to enable JavaScript to run this app.</noscript>
    <div id="root"></div>
    <!--
      This HTML file is a template.
      If you open it directly in the browser, you will see an empty page.

      You can add webfonts, meta tags, or analytics to this file.
      The build step will place the bundled scripts into the <body> tag.

      To begin the development, run `npm start` or `yarn start`.
      To create a production bundle, use `npm run build` or `yarn build`.
    -->
  </body>
</html>


================================================
FILE: webui/public/manifest.json
================================================
{
  "short_name": "TransOPT",
  "name": "TransOPT",
  "icons": [
    {
      "src": "transopt.png",
      "sizes": "64x64 32x32 24x24 16x16",
      "type": "image/png"
    },
    {
      "src": "transopt.png",
      "type": "image/png",
      "sizes": "192x192"
    },
    {
      "src": "transopt.png",
      "type": "image/png",
      "sizes": "512x512"
    }
  ],
  "start_url": ".",
  "display": "standalone",
  "theme_color": "#000000",
  "background_color": "#ffffff"
}


================================================
FILE: webui/public/robots.txt
================================================
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:


================================================
FILE: webui/src/App.css
================================================
.App {
  text-align: center;
}

.App-logo {
  height: 40vmin;
  pointer-events: none;
}

@media (prefers-reduced-motion: no-preference) {
  .App-logo {
    animation: App-logo-spin infinite 20s linear;
  }
}

.App-header {
  background-color: #282c34;
  min-height: 100vh;
  display: flex;
  flex-direction: column;
  align-items: center;
  justify-content: center;
  font-size: calc(10px + 2vmin);
  color: white;
}

.App-link {
  color: #61dafb;
}

@keyframes App-logo-spin {
  from {
    transform: rotate(0deg);
  }
  to {
    transform: rotate(360deg);
  }
}


================================================
FILE: webui/src/App.js
================================================
import React, { lazy, useEffect } from 'react'
import './App.css';
import { BrowserRouter as Router, Route, Routes, Navigate, Redirect, Switch} from 'react-router-dom'


import { themeChange } from 'theme-change'
import checkAuth from './app/auth';
import initializeApp from './app/init';


// Importing pages
const Layout = lazy(() => import('./containers/Layout'))


// Initializing different libraries
initializeApp()


// Check for login and initialize axios
const token = checkAuth()


function App() {

  useEffect(() => {
    // 👆 daisy UI themes initialization
    themeChange(false)
  }, [])


  return (
    <>
      <Router>
        <Routes>
          <Route path="/app/*" element={<Layout />} />
          <Route path="*" element={<Navigate to="/app/welcome" replace />} />
        </Routes>
      </Router>
    </>
  )
}

export default App


================================================
FILE: webui/src/App.test.js
================================================
import { render, screen } from '@testing-library/react';
import App from './App';

test('renders learn react link', () => {
  render(<App />);
  const linkElement = screen.getByText(/learn react/i);
  expect(linkElement).toBeInTheDocument();
});


================================================
FILE: webui/src/app/auth.js
================================================
import axios from "axios"

const checkAuth = () => {
/*  Getting token value stored in localstorage, if token is not present we will open login page 
    for all internal dashboard routes  */
    const TOKEN = localStorage.getItem("token")
    const PUBLIC_ROUTES = ["login", "forgot-password", "register", "documentation"]

    const isPublicPage = PUBLIC_ROUTES.some( r => window.location.href.includes(r))

    if(!TOKEN && !isPublicPage){
        window.location.href = '/login'
        return;
    }else{
        axios.defaults.headers.common['Authorization'] = `Bearer ${TOKEN}`

        axios.interceptors.request.use(function (config) {
            // UPDATE: Add this code to show global loading indicator
            document.body.classList.add('loading-indicator');
            return config
          }, function (error) {
            return Promise.reject(error);
          });
          
          axios.interceptors.response.use(function (response) {
            // UPDATE: Add this code to hide global loading indicator
            document.body.classList.remove('loading-indicator');
            return response;
          }, function (error) {
            document.body.classList.remove('loading-indicator');
            return Promise.reject(error);
          });
        return TOKEN
    }
}

export default checkAuth

================================================
FILE: webui/src/app/init.js
================================================
import axios from "axios"

const initializeApp = () => {
    
    // Setting base URL for all API request via axios
    axios.defaults.baseURL = process.env.REACT_APP_BASE_URL


    if (!process.env.NODE_ENV || process.env.NODE_ENV === 'development') {
        // dev code


    } else {
        // Prod build code


        // Removing console.log from prod
        console.log = () => {};


        // init analytics here
    }

}

export default initializeApp

================================================
FILE: webui/src/app/store.js
================================================
import { configureStore } from '@reduxjs/toolkit'
import headerSlice from '../features/common/headerSlice'
import modalSlice from '../features/common/modalSlice'
import rightDrawerSlice from '../features/common/rightDrawerSlice'
import leadsSlice from '../features/leads/leadSlice'

const combinedReducer = {
  header : headerSlice,
  rightDrawer : rightDrawerSlice,
  modal : modalSlice,
  lead : leadsSlice
}

export default configureStore({
    reducer: combinedReducer
})

================================================
FILE: webui/src/components/CalendarView/index.js
================================================
import { useEffect, useState } from "react";
import  ChevronLeftIcon from "@heroicons/react/24/solid/ChevronLeftIcon";
import  ChevronRightIcon  from "@heroicons/react/24/solid/ChevronRightIcon";
import moment from "moment";
import { CALENDAR_EVENT_STYLE } from "./util";

const THEME_BG = CALENDAR_EVENT_STYLE

function CalendarView({calendarEvents, addNewEvent, openDayDetail}){

    const today = moment().startOf('day')
    const weekdays = ["sun", "mon", "tue", "wed", "thu", "fri", "sat"];
    const colStartClasses = [
      "",
      "col-start-2",
      "col-start-3",
      "col-start-4",
      "col-start-5",
      "col-start-6",
      "col-start-7",
  ];

    const [firstDayOfMonth, setFirstDayOfMonth] = useState(moment().startOf('month'))
    const [events, setEvents] = useState([])
    const [currMonth, setCurrMonth] = useState(() => moment(today).format("MMM-yyyy"));

    useEffect(() => {
        setEvents(calendarEvents)
    }, [calendarEvents])
    

    const allDaysInMonth = ()=> {
        let start = moment(firstDayOfMonth).startOf('week')
        let end = moment(moment(firstDayOfMonth).endOf('month')).endOf('week')
        var days = [];
        var day = start;
        while (day <= end) {
            days.push(day.toDate());
            day = day.clone().add(1, 'd');
        }
        return days
    }

    const getEventsForCurrentDate = (date) => {
        let filteredEvents = events.filter((e) => {return moment(date).isSame(moment(e.startTime), 'day') } )
        if(filteredEvents.length > 2){
            let originalLength = filteredEvents.length
            filteredEvents = filteredEvents.slice(0, 2)
            filteredEvents.push({title : `${originalLength - 2} more`, theme : "MORE"})
        }
        return filteredEvents
    }

    const openAllEventsDetail = (date, theme) => {
        if(theme != "MORE")return 1
        let filteredEvents = events.filter((e) => {return moment(date).isSame(moment(e.startTime), 'day') } ).map((e) => {return {title : e.title, theme : e.theme}})
        openDayDetail({filteredEvents, title : moment(date).format("D MMM YYYY")})
    }

    const isToday = (date) => {
        return moment(date).isSame(moment(), 'day');
    }

    const isDifferentMonth = (date) => {
        return moment(date).month() != moment(firstDayOfMonth).month() 
    }

    const getPrevMonth = (event) => {
        const firstDayOfPrevMonth = moment(firstDayOfMonth).add(-1, 'M').startOf('month');
        setFirstDayOfMonth(firstDayOfPrevMonth)
        setCurrMonth(moment(firstDayOfPrevMonth).format("MMM-yyyy"));
    };

    const getCurrentMonth = (event) => {
        const firstDayOfCurrMonth = moment().startOf('month');
        setFirstDayOfMonth(firstDayOfCurrMonth)
        setCurrMonth(moment(firstDayOfCurrMonth).format("MMM-yyyy"));
    };

    const getNextMonth = (event) => {
        const firstDayOfNextMonth = moment(firstDayOfMonth).add(1, 'M').startOf('month');
        setFirstDayOfMonth(firstDayOfNextMonth)
        setCurrMonth(moment(firstDayOfNextMonth).format("MMM-yyyy"));
    };
 
    return(
        <>
      <div className="w-full  bg-base-100 p-4 rounded-lg">
        <div className="flex items-center justify-between">
          <div className="flex  justify-normal gap-2 sm:gap-4">
          <p className="font-semibold text-xl w-48">
                    {moment(firstDayOfMonth).format("MMMM yyyy").toString()}<span className="text-xs ml-2 ">Beta</span>
                </p>

                    <button className="btn  btn-square btn-sm btn-ghost"  onClick={getPrevMonth}><ChevronLeftIcon
                    className="w-5 h-5"
                     
                    /></button>
                    <button className="btn  btn-sm btn-ghost normal-case" onClick={getCurrentMonth}>
                      
                    Current Month</button>
                     <button className="btn btn-square btn-sm btn-ghost" onClick={getNextMonth}><ChevronRightIcon
                    className="w-5 h-5"
                      
                    /></button>
            </div>
            <div>
                <button className="btn  btn-sm btn-ghost btn-outline normal-case" onClick={addNewEvent}>Add New Event</button>
            </div>
            
        </div>
        <div className="my-4 divider" />
        <div className="grid grid-cols-7 gap-6 sm:gap-12 place-items-center">
          {weekdays.map((day, key) => {
            return (
              <div  className="text-xs capitalize" key={key}>
                {day}
              </div>
            );
          })}
        </div>

             
        <div className="grid grid-cols-7 mt-1  place-items-center">
          {allDaysInMonth().map((day, idx) => {
            return (
              <div key={idx} className={colStartClasses[moment(day).day().toString()] + " border border-solid w-full h-28  "}>
                <p className={`inline-block flex items-center  justify-center h-8 w-8 rounded-full mx-1 mt-1 text-sm cursor-pointer hover:bg-base-300 ${isToday(day) && " bg-blue-100 dark:bg-blue-400 dark:hover:bg-base-300 dark:text-white"} ${isDifferentMonth(day) && " text-slate-400 dark:text-slate-600"}`} onClick={() => addNewEvent(day)}> { moment(day).format("D") }</p>
                {
                    getEventsForCurrentDate(day).map((e, k) => {
                        return <p key={k} onClick={() => openAllEventsDetail(day, e.theme)} className={`text-xs px-2 mt-1 truncate  ${THEME_BG[e.theme] || ""}`}>{e.title}</p>
                    })
                }
              </div>
            );
          })}
        </div>

   
      </div>
        </>
    )
}


export default CalendarView

================================================
FILE: webui/src/components/CalendarView/util.js
================================================
const moment  = require("moment");

module.exports = Object.freeze({
    CALENDAR_EVENT_STYLE : {
        "BLUE" : "bg-blue-200 dark:bg-blue-600 dark:text-blue-100",
        "GREEN" : "bg-green-200 dark:bg-green-600 dark:text-green-100",
        "PURPLE" : "bg-purple-200 dark:bg-purple-600 dark:text-purple-100",
        "ORANGE" : "bg-orange-200 dark:bg-orange-600 dark:text-orange-100",
        "PINK" : "bg-pink-200 dark:bg-pink-600 dark:text-pink-100",
        "MORE" : "hover:underline cursor-pointer font-medium "
    }

    
});


================================================
FILE: webui/src/components/Cards/TitleCard.js
================================================
import Subtitle from "../Typography/Subtitle"

  
  function TitleCard({title, children, topMargin, TopSideButtons}){
      return(
          <div className={"card w-full p-6 bg-base-100 shadow-xl " + (topMargin || "mt-6")}>

            {/* Title for Card */}
              <Subtitle styleClass={TopSideButtons ? "inline-block" : ""}>
                {title}

                {/* Top side button, show only if present */}
                {
                    TopSideButtons && <div className="inline-block float-right">{TopSideButtons}</div>
                }
              </Subtitle>
              
              <div className="divider mt-2"></div>
          
              {/** Card Body */}
              <div className='h-full w-full pb-6 bg-base-100'>
                  {children}
              </div>
          </div>
          
      )
  }
  
  
  export default TitleCard

================================================
FILE: webui/src/components/Input/InputText.js
================================================
import { useState } from "react"


function InputText({labelTitle, labelStyle, type, containerStyle, defaultValue, placeholder, updateFormValue, updateType}){

    const [value, setValue] = useState(defaultValue)

    const updateInputValue = (val) => {
        setValue(val)
        updateFormValue({updateType, value : val})
    }

    return(
        <div className={`form-control w-full ${containerStyle}`}>
            <label className="label">
                <span className={"label-text text-base-content " + labelStyle}>{labelTitle}</span>
            </label>
            <input type={type || "text"} value={value} placeholder={placeholder || ""} onChange={(e) => updateInputValue(e.target.value)}className="input  input-bordered w-full " />
        </div>
    )
}


export default InputText

================================================
FILE: webui/src/components/Input/SearchBar.js
================================================


import React, { useEffect } from 'react'

function SearchBar({searchText, styleClass, placeholderText, setSearchText}) {


const updateSearchInput = (value) => {
    setSearchText(value)
}

  return (
    <div className={"inline-block " + styleClass}>
    <div className="input-group  relative flex flex-wrap items-stretch w-full ">
      <input type="search" value={searchText} placeholder={placeholderText || "Search"} onChange={(e) => updateSearchInput(e.target.value)} className="input input-sm input-bordered  w-full max-w-xs" />
  </div>
</div>
  )
}

export default SearchBar


================================================
FILE: webui/src/components/Input/SelectBox.js
================================================

import axios from 'axios'
import capitalize from 'capitalize-the-first-letter'
import React, { useState, useEffect } from 'react'
import InformationCircleIcon from '@heroicons/react/24/outline/InformationCircleIcon'


function SelectBox(props){
    
    const {labelTitle, labelDescription, defaultValue, containerStyle, placeholder, labelStyle, options, updateType, updateFormValue} = props

    const [value, setValue] = useState(defaultValue || "")


    const updateValue = (newValue) =>{
        updateFormValue({updateType, value : newValue})
        setValue(newValue)
    }


    return (
        <div className={`inline-block ${containerStyle}`}>
            <label  className={`label  ${labelStyle}`}>
                <div className="label-text">{labelTitle}
                {labelDescription && <div className="tooltip tooltip-right" data-tip={labelDescription}><InformationCircleIcon className='w-4 h-4'/></div>}
                </div>
            </label>

            <select className="select select-bordered w-full" value={value} onChange={(e) => updateValue(e.target.value)}>
                <option disabled value="PLACEHOLDER">{placeholder}</option>
                {
                    options.map((o, k) => {
                        return <option value={o.value || o.name} key={k}>{o.name}</option>
                    })
                }
            </select>
        </div>
    )
}

export default SelectBox


================================================
FILE: webui/src/components/Input/TextAreaInput.js
================================================
import { useState } from "react"


function TextAreaInput({labelTitle, labelStyle, type, containerStyle, defaultValue, placeholder, updateFormValue, updateType}){

    const [value, setValue] = useState(defaultValue)

    const updateInputValue = (val) => {
        setValue(val)
        updateFormValue({updateType, value : val})
    }

    return(
        <div className={`form-control w-full ${containerStyle}`}>
            <label className="label">
                <span className={"label-text text-base-content " + labelStyle}>{labelTitle}</span>
            </label>
            <textarea value={value} className="textarea textarea-bordered w-full" placeholder={placeholder || ""} onChange={(e) => updateInputValue(e.target.value)}></textarea>
        </div>
    )
}


export default TextAreaInput

================================================
FILE: webui/src/components/Input/ToogleInput.js
================================================
import { useState } from "react"


function ToogleInput({labelTitle, labelStyle, type, containerStyle, defaultValue, placeholder, updateFormValue, updateType}){

    const [value, setValue] = useState(defaultValue)

    const updateToogleValue = () => {
        setValue(!value)
        updateFormValue({updateType, value : !value})
    }

    return(
        <div className={`form-control w-full ${containerStyle}`}>
            <label className="label cursor-pointer">
                <span className={"label-text text-base-content " + labelStyle}>{labelTitle}</span>
                <input type="checkbox" className="toggle" checked={value}  onChange={(e) => updateToogleValue()}/>
            </label>
        </div>
    )
}


export default ToogleInput


================================================
FILE: webui/src/components/Typography/ErrorText.js
================================================
function ErrorText({styleClass, children}){
    return(
        <p className={`text-center  text-error ${styleClass}`}>{children}</p>
    )
}

export default ErrorText

================================================
FILE: webui/src/components/Typography/HelperText.js
================================================
function HelperText({className, children}){
    return(
        <div className={`text-slate-400 ${className}`}>{children}</div>
    )
}

export default HelperText

================================================
FILE: webui/src/components/Typography/Subtitle.js
================================================
 function Subtitle({styleClass, children}){
    return(
        <div className={`text-xl font-semibold ${styleClass}`}>{children}</div>
    )
}

export default Subtitle

================================================
FILE: webui/src/components/Typography/Title.js
================================================
function Title({className, children}){
    return(
        <p className={`text-2xl font-bold  ${className}`}>{children}</p>
    )
}

export default Title

================================================
FILE: webui/src/containers/Header.js
================================================
import { themeChange } from 'theme-change'
import React, {  useEffect, useState } from 'react'
import { useSelector, useDispatch } from 'react-redux'
import BellIcon  from '@heroicons/react/24/outline/BellIcon'
import Bars3Icon  from '@heroicons/react/24/outline/Bars3Icon'
import MoonIcon from '@heroicons/react/24/outline/MoonIcon'
import SunIcon from '@heroicons/react/24/outline/SunIcon'
import { openRightDrawer } from '../features/common/rightDrawerSlice';
import { RIGHT_DRAWER_TYPES } from '../utils/globalConstantUtil'

import { NavLink,  Routes, Link , useLocation} from 'react-router-dom'


function Header(){

    const dispatch = useDispatch()
    const {noOfNotifications, pageTitle} = useSelector(state => state.header)
    const [currentTheme, setCurrentTheme] = useState(localStorage.getItem("theme"))

    useEffect(() => {
        themeChange(false)
        if(currentTheme === null){
            if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches ) {
                setCurrentTheme("dark")
            }else{
                setCurrentTheme("light")
            }
        }
        // 👆 false parameter is required for react project
      }, [])


    // Opening right sidebar for notification
    const openNotification = () => {
        dispatch(openRightDrawer({header : "Notifications", bodyType : RIGHT_DRAWER_TYPES.NOTIFICATION}))
    }


    function logoutUser(){
        localStorage.clear();
        window.location.href = '/'
    }

    return(
        // navbar fixed  flex-none justify-between bg-base-300  z-10 shadow-md
        
        <>
            <div className="navbar sticky top-0 bg-base-100  z-10 shadow-md ">


                {/* Menu toogle for mobile view or small screen */}
                <div className="flex-1">
                    <label htmlFor="left-sidebar-drawer" className="btn btn-primary drawer-button lg:hidden">
                    <Bars3Icon className="h-5 inline-block w-5"/></label>
                    <h1 className="text-2xl font-semibold ml-2">{pageTitle}</h1>
                </div>

                
            <div className="flex-none ">

                {/* Multiple theme selection, uncomment this if you want to enable multiple themes selection, 
                also includes corporate and retro themes in tailwind.config file */}
                
                {/* <select className="select select-sm mr-4" data-choose-theme>
                    <option disabled selected>Theme</option>
                    <option value="light">Default</option>
                    <option value="dark">Dark</option>
                    <option value="corporate">Corporate</option>
                    <option value="retro">Retro</option>
                </select> */}


            {/* Light and dark theme selection toogle **/}
            <label className="swap ">
                <input type="checkbox"/>
                <SunIcon data-set-theme="light" data-act-class="ACTIVECLASS" className={"fill-current w-6 h-6 "+(currentTheme === "dark" ? "swap-on" : "swap-off")}/>
                <MoonIcon data-set-theme="dark" data-act-class="ACTIVECLASS" className={"fill-current w-6 h-6 "+(currentTheme === "light" ? "swap-on" : "swap-off")} />
            </label>


                {/* Notification icon */}
                {/* <button className="btn btn-ghost ml-4  btn-circle" onClick={() => openNotification()}>
                    <div className="indicator">
                        <BellIcon className="h-6 w-6"/>
                        {noOfNotifications > 0 ? <span className="indicator-item badge badge-secondary badge-sm">{noOfNotifications}</span> : null }
                    </div>
                </button> */}


                {/* Profile icon, opening menu on click */}
                {/* <div className="dropdown dropdown-end ml-4">
                    <label tabIndex={0} className="btn btn-ghost btn-circle avatar">
                        <div className="w-10 rounded-full">
                        <img src="https://placeimg.com/80/80/people" alt="profile" />
                        </div>
                    </label>
                    <ul tabIndex={0} className="menu menu-compact dropdown-content mt-3 p-2 shadow bg-base-100 rounded-box w-52">
                        <li className="justify-between">
                        <Link to={'/app/settings-profile'}>
                            Profile Settings
                            <span className="badge">New</span>
                            </Link>
                        </li>
                        <li className=''><Link to={'/app/settings-billing'}>Bill History</Link></li>
                        <div className="divider mt-0 mb-0"></div>
                        <li><a onClick={logoutUser}>Logout</a></li>
                    </ul>
                </div> */}
            </div>
            </div>

        </>
    )
}

export default Header

================================================
FILE: webui/src/containers/Layout.js
================================================
import PageContent from "./PageContent"
import LeftSidebar from "./LeftSidebar"
import { useSelector, useDispatch } from 'react-redux'
import RightSidebar from './RightSidebar'
import { useEffect } from "react"
import  {  removeNotificationMessage } from "../features/common/headerSlice"
import {NotificationContainer, NotificationManager} from 'react-notifications';
import 'react-notifications/lib/notifications.css';
import ModalLayout from "./ModalLayout"

function Layout(){
  const dispatch = useDispatch()
  const {newNotificationMessage, newNotificationStatus} = useSelector(state => state.header)


  useEffect(() => {
      if(newNotificationMessage !== ""){
          if(newNotificationStatus === 1)NotificationManager.success(newNotificationMessage, 'Success')
          if(newNotificationStatus === 0)NotificationManager.error( newNotificationMessage, 'Error')
          dispatch(removeNotificationMessage())
      }
  }, [newNotificationMessage])

    return(
      <>
        { /* Left drawer - containing page content and side bar (always open) */ }
        <div className="drawer  lg:drawer-open">
            <input id="left-sidebar-drawer" type="checkbox" className="drawer-toggle" />
            <PageContent/>
            <LeftSidebar />
        </div>

        { /* Right drawer - containing secondary content like notifications list etc.. */ }
        {/* <RightSidebar /> */}


        {/** Notification layout container */}
        {/* <NotificationContainer /> */}

      {/* Modal layout container */}
        {/* <ModalLayout /> */}

      </>
    )
}

export default Layout

================================================
FILE: webui/src/containers/LeftSidebar.js
================================================
import routes from '../routes/sidebar'
import { NavLink,  Routes, Link , useLocation} from 'react-router-dom'
import SidebarSubmenu from './SidebarSubmenu';
import XMarkIcon  from '@heroicons/react/24/outline/XMarkIcon'
import { useDispatch } from 'react-redux';

function LeftSidebar(){
    const location = useLocation();

    const dispatch = useDispatch()


    const close = (e) => {
        document.getElementById('left-sidebar-drawer').click()
    }

    return(
        <div className="drawer-side  z-30  ">
            <label htmlFor="left-sidebar-drawer" className="drawer-overlay"></label> 
            <ul className="menu  pt-2 w-80 bg-base-100 min-h-full   text-base-content">
            <button className="btn btn-ghost bg-base-300  btn-circle z-50 top-0 right-0 mt-4 mr-2 absolute lg:hidden" onClick={() => close()}>
            <XMarkIcon className="h-5 inline-block w-5"/>
            </button>

                <li className="mb-2 font-semibold text-xl">
                    
                    <Link to={'/app/welcome'}><img className="mask w-30" src="/transopt.png" alt="TrasnOpt Logo"/></Link> </li>
                {
                    routes.map((route, k) => {
                        return(
                            <li className="" key={k}>
                                {
                                    route.submenu ? 
                                        <SidebarSubmenu {...route}/> : 
                                    (<NavLink
                                        end
                                        to={route.path}
                                        className={({isActive}) => `${isActive ? 'font-semibold  bg-base-200 ' : 'font-normal'}`} >
                                           {route.icon} {route.name}
                                            {
                                                location.pathname === route.path ? (<span className="absolute inset-y-0 left-0 w-1 rounded-tr-md rounded-br-md bg-primary "
                                                aria-hidden="true"></span>) : null
                                            }
                                    </NavLink>)
                                }
                                
                            </li>
                        )
                    })
                }

            </ul>
        </div>
    )
}

export default LeftSidebar

================================================
FILE: webui/src/containers/ModalLayout.js
================================================
import { useEffect } from 'react'
import { MODAL_BODY_TYPES } from '../utils/globalConstantUtil'
import { useSelector, useDispatch } from 'react-redux'
import { closeModal } from '../features/common/modalSlice'
import AddLeadModalBody from '../features/leads/components/AddLeadModalBody'
import ConfirmationModalBody from '../features/common/components/ConfirmationModalBody'


function ModalLayout(){


    const {isOpen, bodyType, size, extraObject, title} = useSelector(state => state.modal)
    const dispatch = useDispatch()

    const close = (e) => {
        dispatch(closeModal(e))
    }


    return(
        <>
        {/* The button to open modal */}

            {/* Put this part before </body> tag */}
            <div className={`modal ${isOpen ? "modal-open" : ""}`}>
            <div className={`modal-box  ${size === 'lg' ? 'max-w-5xl' : ''}`}>
                <button className="btn btn-sm btn-circle absolute right-2 top-2" onClick={() => close()}>✕</button>
                <h3 className="font-semibold text-2xl pb-6 text-center">{title}</h3>


                {/* Loading modal body according to different modal type */}
                {
                    {
                             [MODAL_BODY_TYPES.LEAD_ADD_NEW] : <AddLeadModalBody closeModal={close} extraObject={extraObject}/>,
                             [MODAL_BODY_TYPES.CONFIRMATION] : <ConfirmationModalBody extraObject={extraObject} closeModal={close}/>,
                             [MODAL_BODY_TYPES.DEFAULT] : <div></div>
                    }[bodyType]
                }
            </div>
            </div>
            </>
    )
}

export default ModalLayout

================================================
FILE: webui/src/containers/PageContent.js
================================================
import Header from "./Header"
import { BrowserRouter as Router, Route, Routes } from 'react-router-dom'
import routes from '../routes'
import { Suspense, lazy } from 'react'
import SuspenseContent from "./SuspenseContent"
import { useSelector } from 'react-redux'
import { useEffect, useRef } from "react"

const Page404 = lazy(() => import('../pages/protected/404'))


function PageContent(){
    const mainContentRef = useRef(null);
    const {pageTitle} = useSelector(state => state.header)


    // Scroll back to top on new page load
    useEffect(() => {
        mainContentRef.current.scroll({
            top: 0,
            behavior: "smooth"
          });
      }, [pageTitle])

    return(
        <div className="drawer-content flex flex-col ">
            <Header/>
            <main className="flex-1 overflow-y-auto md:pt-4 pt-4 px-6  bg-base-200" ref={mainContentRef}>
                <Suspense fallback={<SuspenseContent />}>
                        <Routes>
                            {
                                routes.map((route, key) => {
                                    return(
                                        <Route
                                            key={key}
                                            exact={true}
                                            path={`${route.path}`}
                                            element={<route.component />}
                                        />
                                    )
                                })
                            }

                            {/* Redirecting unknown url to 404 page */}
                            <Route path="*" element={<Page404 />} />
                        </Routes>
                </Suspense>
                <div className="h-16"></div>
            </main>
        </div> 
    )
}


export default PageContent


================================================
FILE: webui/src/containers/RightSidebar.js
================================================
import XMarkIcon  from '@heroicons/react/24/solid/XMarkIcon'
import { useDispatch, useSelector } from 'react-redux'
import NotificationBodyRightDrawer from '../features/common/components/NotificationBodyRightDrawer'
import { closeRightDrawer } from '../features/common/rightDrawerSlice'
import { RIGHT_DRAWER_TYPES } from '../utils/globalConstantUtil'
import CalendarEventsBodyRightDrawer from '../features/calendar/CalendarEventsBodyRightDrawer'


function RightSidebar(){

    const {isOpen, bodyType, extraObject, header} = useSelector(state => state.rightDrawer)
    const dispatch = useDispatch()

    const close = (e) => {
      dispatch(closeRightDrawer(e))
    }

      
    return(
        <div className={" fixed overflow-hidden z-20 bg-gray-900 bg-opacity-25 inset-0 transform ease-in-out " + (isOpen ? " transition-opacity opacity-100 duration-500 translate-x-0  " : " transition-all delay-500 opacity-0 translate-x-full  ")}>
      
            <section className={ "w-80 md:w-96  right-0 absolute bg-base-100 h-full shadow-xl delay-400 duration-500 ease-in-out transition-all transform  " + (isOpen ? " translate-x-0 " : " translate-x-full ")}>
                
                    <div className="relative  pb-5 flex flex-col  h-full">
                        
                        {/* Header */}
                        <div className="navbar   flex pl-4 pr-4   shadow-md ">
                            <button className="float-left btn btn-circle btn-outline btn-sm" onClick={() => close()}>
                            <XMarkIcon className="h-5 w-5"/>
                            </button>
                            <span className="ml-2 font-bold text-xl">{header}</span>
                        </div>


                        {/* ------------------ Content Start ------------------ */}
                        <div className="overflow-y-scroll pl-4 pr-4">
                            <div className="flex flex-col w-full">
                            {/* Loading drawer body according to different drawer type */}
                            {
                                {
                                        [RIGHT_DRAWER_TYPES.NOTIFICATION] : <NotificationBodyRightDrawer {...extraObject} closeRightDrawer={close}/>,
                                        [RIGHT_DRAWER_TYPES.CALENDAR_EVENTS] : <CalendarEventsBodyRightDrawer {...extraObject} closeRightDrawer={close}/>,
                                        [RIGHT_DRAWER_TYPES.DEFAULT] : <div></div>
                                }[bodyType]
                            }
                                
                            </div>
                        </div>
                        {/* ------------------ Content End ------------------ */}
                    </div>

            </section>

            <section className=" w-screen h-full cursor-pointer " onClick={() => close()} ></section>
        </div>
    )
}

export default RightSidebar

================================================
FILE: webui/src/containers/SidebarSubmenu.js
================================================
import ChevronDownIcon from  '@heroicons/react/24/outline/ChevronDownIcon'
import {useEffect, useState} from 'react'
import { Link, useLocation } from 'react-router-dom'


function SidebarSubmenu({submenu, name, icon}){
    const location = useLocation()
    const [isExpanded, setIsExpanded] = useState(false)


    /** Open Submenu list if path found in routes, this is for directly loading submenu routes  first time */
    useEffect(() => {
        if(submenu.filter(m => {return m.path === location.pathname})[0])setIsExpanded(true)
    }, [])

    return (
        <div className='flex flex-col'>

            {/** Route header */}
            <div className='w-full block' onClick={() => setIsExpanded(!isExpanded)}>
                {icon} {name} 
                <ChevronDownIcon className={'w-5 h-5 mt-1 float-right delay-400 duration-500 transition-all  ' + (isExpanded ? 'rotate-180' : '')}/>
            </div>

            {/** Submenu list */}
            <div className={` w-full `+ (isExpanded ? "" : "hidden")}>
                <ul className={`menu menu-compact`}>
                {
                    submenu.map((m, k) => {
                        return(
                            <li key={k}>
                                <Link to={m.path}>
                                    {m.icon} {m.name}
                                    {
                                            location.pathname == m.path ? (<span className="absolute mt-1 mb-1 inset-y-0 left-0 w-1 rounded-tr-md rounded-br-md bg-primary "
                                                aria-hidden="true"></span>) : null
                                    }
                                </Link>
                            </li>
                        )
                    })
                }
                </ul>
            </div>
        </div>
    )
}

export default SidebarSubmenu

================================================
FILE: webui/src/containers/SuspenseContent.js
================================================
function SuspenseContent(){
    return(
        <div className="w-full h-screen text-gray-300 dark:text-gray-200 bg-base-100">
            Loading...
        </div>
    )
}

export default SuspenseContent

================================================
FILE: webui/src/features/algorithm/components/OptTable.js
================================================
import React from "react";
import {
  Table,
} from "reactstrap";
// import { Table } from 'react-bootstrap';

// function OptTable({ optimizer }) {
//     return (
//         <Table lg={12} md={12} sm={12} striped>
//             <thead>
//                 <tr className="fs-sm">
//                     <th>#</th>
//                     <th>Narrow Search Space</th>
//                     <th>Initialization</th>
//                     <th>Pre-train</th>
//                     <th>Surrogate Model</th>
//                     <th>Acquisition Function</th>
//                     <th>Normalizer</th>
//                 </tr>
//             </thead>
//             <tbody>
//                 <tr key="Name">
//                     <td>Name</td>
//                     <td>{optimizer.SpaceRefiner}</td>
//                     <td>{optimizer.Sampler}</td>
//                     <td>{optimizer.Pretrain}</td>
//                     <td>{optimizer.Model}</td>
//                     <td>{optimizer.ACF}</td>
//                     <td>{optimizer.Normalizer}</td>
//                 </tr>
//                 <tr key="Parameters">
//                     <td>Parameters</td>
//                     <td>{optimizer.SpaceRefinerParameters}</td>
//                     <td>InitNum:{optimizer.SamplerInitNum},{optimizer.SamplerParameters}</td>
//                     <td>{optimizer.PretrainParameters}</td>
//                     <td>{optimizer.ModelParameters}</td>
//                     <td>{optimizer.ACFParameters}</td>
//                     <td>{optimizer.NormalizerParameters}</td>
//                 </tr>
//             </tbody>
//         </Table>
//     );
// }


function OptTable({ optimizer }) {
    return (
        <Table lg={12} md={12} sm={12} striped>
            <thead>
                <tr className="fs-sm">
                    <th>#</th>
                    <th>Name</th>
                    <th>Parameters</th>
                </tr>
            </thead>
            <tbody>
                <tr key="Name">
                    <td>Prune Search Space</td>
                    <td>{optimizer.SpaceRefiner}</td>
                    <td>{optimizer.SpaceRefinerParameters}</td>
                </tr>

                <tr key="Name">
                    <td>Initialization</td>
                    <td>{optimizer.Sampler}</td>
                    <td>The number of Initialization:{optimizer.SamplerInitNum},{optimizer.SamplerParameters}</td>
                </tr>

                <tr key="Name">
                    <td>Pre-train</td>
                    <td>{optimizer.Pretrain}</td>
                    <td>{optimizer.PretrainParameters}</td>

                </tr>
                <tr key="Name">
                    <td>Surrogate Model</td>
                    <td>{optimizer.Model}</td>
                    <td>{optimizer.ModelParameters}</td>

                </tr>
                <tr key="Name">
                    <td>Acquisition Function</td>
                    <td>{optimizer.ACF}</td>
                    <td>{optimizer.ACFParameters}</td>
                </tr>
                
                <tr key="Name">
                    <td>Normalizer</td>
                    <td>{optimizer.Normalizer}</td>
                    <td>{optimizer.NormalizerParameters}</td>

                </tr>

            </tbody>
        </Table>
    );
}


export default OptTable;


================================================
FILE: webui/src/features/algorithm/components/SelectPlugin.js
================================================
import React, { useState } from "react";

import {
    Button,
    Form,
    Input,
    Select,
    ConfigProvider,
    Modal,
} from "antd";

function SelectAlgorithm({SpaceRefiner, Sampler, Pretrain, Model, ACF, DataSelector, Normalizer, updateTable}) {
    const [form] = Form.useForm()

    const onFinish = (values) => {
        // 构造要发送到后端的数据
        const messageToSend = values;
        updateTable(messageToSend)
        console.log('Request data:', messageToSend);
        // 向后端发送请求...
        fetch('http://localhost:5001/api/configuration/select_algorithm', {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify(messageToSend),
          })
          .then(response => {
            if (!response.ok) {
              throw new Error('Network response was not ok');
            } 
            return response.json();
          })
          .then(succeed => {
            console.log('Message from back-end:', succeed);
            Modal.success({
              title: 'Information',
              content: 'Submit successfully!'
            })
          })
          .catch((error) => {
            console.error('Error sending message:', error);
            var errorMessage = error.error;
            Modal.error({
              title: 'Information',
              content: 'error:' + errorMessage
            })
          });
      };

    return (
        <ConfigProvider
          theme={{
            components: {
              Input: {
                addonBg:"black"
              },
            },
          }}        
        >
        <Form
            form={form}
            name="Algorithm"
            onFinish={onFinish}
            style={{width:"100%"}}
            autoComplete="off"
            initialValues={{
              SpaceRefiner: SpaceRefiner[0].name,
              SpaceRefinerParameters: '',
              SpaceRefinerDataSelector: 'None',
              SpaceRefinerDataSelectorParameters: '',
              Sampler: Sampler[0].name,
              SamplerParameters: '',
              SamplerInitNum: '11',
              SamplerDataSelector: 'None',
              SamplerDataSelectorParameters: '',
              Pretrain: Pretrain[0].name,
              PretrainParameters: '',
              PretrainDataSelector: 'None',
              PretrainDataSelectorParameters: '',
              Model: Model[0].name,
              ModelParameters: '',
              ModelDataSelector: 'None',
              ModelDataSelectorParameters: '',
              ACF: ACF[0].name,
              ACFParameters: '',
              ACFDataSelector: 'None',
              ACFDataSelectorParameters: '',
              Normalizer: Normalizer[0].name,
              NormalizerParameters: '',
              NormalizerDataSelector: 'None',
              NormalizerDataSelectorParameters: '',
            }}
        >
          <div>
            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Search Space Prune</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'SpaceRefiner'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select 
                  placeholder="name"
                  defaultValue={SpaceRefiner[0].name}
                  options={SpaceRefiner.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'SpaceRefinerParameters'}
                style={{ flex: 1 , marginRight: 8}}
              >
                <Input placeholder="Parameters"/>
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'SpaceRefinerDataSelector'}
              >
                
              </Form.Item>
              <Form.Item
                name={'SpaceRefinerDataSelectorParameters'}
              >
                
              </Form.Item>
            </div>

            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Initialization</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'Sampler'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select
                  placeholder="name"
                  defaultValue={Sampler[0].name}
                  options={Sampler.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'SamplerParameters'}
                style={{ flex: 1 }}
              >
                <Input placeholder="Parameters"/>
              </Form.Item>
              <Form.Item
                name={'SamplerInitNum'}
                style={{ flex: 1, marginLeft: 8, marginRight: 8}}
              >
                <Input placeholder="Initial Sample Size"/>
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'SamplerDataSelector'}
              >
              </Form.Item>
              <Form.Item
                name={'SamplerDataSelectorParameters'}
              >
              </Form.Item>
            </div>

            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Pre-train</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'Pretrain'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select
                  placeholder="name"
                  defaultValue={Pretrain[0].name}
                  options={Pretrain.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'PretrainParameters'}
                style={{ flex: 1 , marginRight: 8 }}
              >
                <Input placeholder="Parameters"/>
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'PretrainDataSelector'}
              >
              </Form.Item>
              <Form.Item
                name={'PretrainDataSelectorParameters'}
              >
              </Form.Item>
            </div>

            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Surrogate Model</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'Model'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select
                  placeholder="name"
                  defaultValue={Model[0].name}
                  options={Model.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'ModelParameters'}
                style={{ flex: 1, marginRight: 8}}
              >
                <Input placeholder="Parameters" />
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'ModelDataSelector'}
              >
              </Form.Item>
              <Form.Item
                name={'ModelDataSelectorParameters'}
              >
              </Form.Item>
            </div>

            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Acquisition Function</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'ACF'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select
                  placeholder="name"
                  defaultValue={ACF[0].name}
                  options={ACF.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'ACFParameters'}
                style={{ flex: 1, marginRight: 8}}
              >
                <Input placeholder="Parameters" />
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'ACFDataSelector'}
              >
              </Form.Item>
              <Form.Item
                name={'ACFDataSelectorParameters'}
              >
              </Form.Item>
            </div>

            <div>
                <h5 style={{color:"#111"}}>
                  <span className="fw-semi-bold">Normalizer</span>
                </h5>
            </div>
            <div style={{ display: 'flex', alignItems: 'baseline' }}>
              <Form.Item
                name={'Normalizer'}
                style={{ marginRight: 8 , width: 300}}
              >
                <Select
                  placeholder="name"
                  defaultValue={Normalizer[0].name}
                  options={Normalizer.map(item => ({ value: item.name }))}
                />
              </Form.Item>
              <Form.Item
                name={'NormalizerParameters'}
                style={{ flex: 1, marginRight: 8}}
              >
                <Input placeholder="Parameters"/>
              </Form.Item>
              {/* <h7 style={{color:"white", marginRight:8}}>DataSelector: </h7> */}
              <Form.Item
                name={'NormalizerDataSelector'}
              >
              </Form.Item>
              <Form.Item
                name={'NormalizerDataSelectorParameters'}
              >
              </Form.Item>
            </div>
          </div>

          <Form.Item style={{marginTop:10}}>
            <Button type="primary" htmlType="submit" style={{width:"120px"}}>
              Submit
            </Button>
          </Form.Item>
        </Form>
        </ConfigProvider>
    )
}

export default SelectAlgorithm;

================================================
FILE: webui/src/features/algorithm/index.js
================================================
import React from "react";

import TitleCard from "../../components/Cards/TitleCard"

import SelectPlugins from "./components/SelectPlugin";
import OptTable from "./components/OptTable";


class Algorithm extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      TasksData: [],
      SpaceRefiner: [],
      Sampler: [],
      Pretrain: [],
      Model: [],
      ACF: [],
      DataSelector: [],
      Normalizer: [],
      optimizer: {},
    };
  }

  updateTable = (newOptimizer) => {
    this.setState({ optimizer: newOptimizer });
  }

  render() {
    if (this.state.TasksData.length === 0) {
      const messageToSend = {
        action: 'ask for basic information',
      }
      fetch('http://localhost:5001/api/configuration/basic_information', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Message from back-end:', data);
        this.setState({ TasksData: data.TasksData,
                        SpaceRefiner: data.SpaceRefiner,
                        Sampler: data.Sampler,
                        Pretrain: data.Pretrain,
                        Model: data.Model,
                        ACF: data.ACF,
                        DataSelector: data.DataSelector,
                        Normalizer: data.Normalizer,
                      });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

      fetch('http://localhost:5001/api/RunPage/get_info', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Configuration infomation from back-end:', data);
        this.setState({ optimizer: data.optimizer });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });
    } else {
      return (
        <div>
          <div className="grid mt-4 grid-cols-1 lg:grid-cols-[50%_50%] gap-6">
                <TitleCard>
                  <SelectPlugins SpaceRefiner={this.state.SpaceRefiner}
                                    Sampler={this.state.Sampler}
                                    Pretrain={this.state.Pretrain}
                                    Model={this.state.Model}
                                    ACF={this.state.ACF}
                                    DataSelector={this.state.DataSelector}
                                    Normalizer={this.state.Normalizer}
                                    updateTable={this.updateTable}
                  />
                </TitleCard>

                <TitleCard
                  title={
                    <h5>
                      <span className="fw-semi-bold">Composition</span>
                    </h5>
                  }
                  collapse
                >
                  <OptTable optimizer={this.state.optimizer} />
                </TitleCard>
            </div>
        </div>
      );
    }
  }
}

export default Algorithm;

================================================
FILE: webui/src/features/analytics/charts/Box.js
================================================
import React from 'react';
import * as echarts from 'echarts';
import ReactECharts from 'echarts-for-react';
import BoxData from './data/BoxData.json';
import my_theme from './my_theme.json';

echarts.registerTheme('my_theme', my_theme.theme);

function Box({ BoxData }) {
  // Extract labels and data
  const dataLabel = Object.keys(BoxData);
  const data = Object.values(BoxData);

  // Configure the ECharts option
  const option = {
    dataset: [
      {
        source: data,
      },
      {
        transform: {
          type: 'boxplot',
          config: {
            itemNameFormatter: function (value) {
              return dataLabel[value.value];
            },
          },
        },
      },
      {
        fromDatasetIndex: 1,
        fromTransformResult: 1,
      },
    ],
    tooltip: {
      trigger: 'item',
      axisPointer: {
        type: 'shadow',
      },
    },
    toolbox: {
      feature: {
        saveAsImage: {},
      },
    },
    grid: {
      left: '10%',
      right: '10%',
      bottom: '15%',
    },
    xAxis: {
      type: 'category',
      // boundaryGap: true,
      nameGap: 30,
      axisLabel: {
        color: '#ffffff',
      },
      lineStyle: {
        color: 'black',
      },
    },
    yAxis: {
      type: 'value',
      name: 'value',
      lineStyle: {
        color: 'black',
      },
      axisLabel: {
        color: '#ffffff',
      },
      min: 'dataMin', // Set min to auto-scale
      max: 'dataMax', // Set max to auto-scale
    },
    series: [
      {
        name: 'boxplot',
        type: 'boxplot',
        datasetIndex: 1,
        itemStyle: {
          color: '#2EC7C9',
        },
      },
      {
        name: 'outlier',
        type: 'scatter',
        datasetIndex: 2,
        symbol: 'circle',
      },
    ],
  };

  return (
    <ReactECharts
      option={option}
      style={{ height: 500 }}
      theme="my_theme"
    />
  );
}

export default Box;


================================================
FILE: webui/src/features/analytics/charts/Trajectory.js
================================================
import React, { Component } from 'react';
import {
  Chart,
  Area,
  Line,
  Tooltip,
  View,
} from 'bizcharts';

const scale = {
  y: { 
    sync: true,
    nice: true,
  },
  FEs: {
    type: 'linear',
    nice: true,
  },
};

const color = [
  "#2ec7c9",
  "#b6a2de",
  "#5ab1ef",
  "#ffb980",
  "#d87a80",
  "#8d98b3",
  "#e5cf0d",
  "#97b552",
  "#95706d",
  "#dc69aa",
  "#07a2a4",
  "#9a7fd1",
  "#588dd5",
  "#f5994e",
  "#c05050",
  "#59678c",
  "#c9ab00",
  "#7eb00a",
  "#6f5553",
  "#c14089"
]

class Trajectory extends Component {
  constructor(props) {
    super(props);
  }
  
  render() {
    const { TrajectoryData } = this.props

    return (
      <Chart id="chart" scale={scale} height={400} autoFit>
        <Tooltip shared />
        {TrajectoryData.map((item, index) => (
          <div>
            <View key={index} data={item.average} scale={{ y: { alias: `${item.name}` } }}>
              <Line position="FEs*y" color={color[index]} />
            </View>
            <View key={index} data={item.uncertainty} scale={{ y: { alias: `${item.name}-uncertainty` } }}>
              <Area position="FEs*y" color={color[index]} shape="smooth" />
            </View>
          </div>
        ))}
      </Chart>
    );
  }
}

export default Trajectory;

================================================
FILE: webui/src/features/analytics/charts/my_theme.json
================================================
{
    "version": 1,
    "themeName": "macarons",
    "theme": {
        "seriesCnt": "4",
        "titleColor": "#ffffff",
        "subtitleColor": "rgba(170,170,170,0.92)",
        "textColorShow": false,
        "textColor": "#333",
        "markTextColor": "#ffffff",
        "color": [
            "#2ec7c9",
            "#b6a2de",
            "#5ab1ef",
            "#ffb980",
            "#d87a80",
            "#8d98b3",
            "#e5cf0d",
            "#97b552",
            "#95706d",
            "#dc69aa",
            "#07a2a4",
            "#9a7fd1",
            "#588dd5",
            "#f5994e",
            "#c05050",
            "#59678c",
            "#c9ab00",
            "#7eb00a",
            "#6f5553",
            "#c14089"
        ],
        "borderColor": "#ffffff",
        "borderWidth": "0",
        "visualMapColor": [
            "#5ab1ef",
            "#e0ffff"
        ],
        "legendTextColor": "#ffffff",
        "kColor": "#d87a80",
        "kColor0": "#2ec7c9",
        "kBorderColor": "#d87a80",
        "kBorderColor0": "#2ec7c9",
        "kBorderWidth": 1,
        "lineWidth": 2,
        "symbolSize": 3,
        "symbol": "emptyCircle",
        "symbolBorderWidth": 1,
        "lineSmooth": true,
        "graphLineWidth": 1,
        "graphLineColor": "#aaaaaa",
        "mapLabelColor": "#d87a80",
        "mapLabelColorE": "rgb(100,0,0)",
        "mapBorderColor": "#eeeeee",
        "mapBorderColorE": "#444",
        "mapBorderWidth": 0.5,
        "mapBorderWidthE": 1,
        "mapAreaColor": "#dddddd",
        "mapAreaColorE": "rgba(254,153,78,1)",
        "axes": [
            {
                "type": "all",
                "name": "通用坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#eeeeee",
                "axisTickShow": true,
                "axisTickColor": "#eeeeee",
                "axisLabelShow": true,
                "axisLabelColor": "#eeeeee",
                "splitLineShow": true,
                "splitLineColor": [
                    "#aaaaaa"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "#eeeeee"
                ]
            },
            {
                "type": "category",
                "name": "类目坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": false,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "value",
                "name": "数值坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "log",
                "name": "对数坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "time",
                "name": "时间坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            }
        ],
        "axisSeperateSetting": true,
        "toolboxColor": "#ffffff",
        "toolboxEmphasisColor": "#18a4a6",
        "tooltipAxisColor": "#18a4a6",
        "tooltipAxisWidth": "2",
        "timelineLineColor": "#008acd",
        "timelineLineWidth": 1,
        "timelineItemColor": "#ffffff",
        "timelineItemColorE": "#006cdd",
        "timelineCheckColor": "#2ec7c9",
        "timelineCheckBorderColor": "#2ec7c9",
        "timelineItemBorderWidth": 1,
        "timelineControlColor": "#008acd",
        "timelineControlBorderColor": "#008acd",
        "timelineControlBorderWidth": 0.5,
        "timelineLabelColor": "#008acd",
        "datazoomBackgroundColor": "rgba(47,69,84,0)",
        "datazoomDataColor": "#efefff",
        "datazoomFillColor": "rgba(182,162,222,0.2)",
        "datazoomHandleColor": "#008acd",
        "datazoomHandleWidth": "100",
        "datazoomLabelColor": "#333333"
    }
}

================================================
FILE: webui/src/features/analytics/components/LineChart.js
================================================
import React from 'react';
import { Line } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';

import {
  Chart as ChartJS,
  LineElement,
  PointElement,
  LinearScale,
  Title,
  CategoryScale,
  Tooltip,
  Legend,
  Filler,
} from 'chart.js';

// 注册图表组件
ChartJS.register(LineElement, PointElement, LinearScale, CategoryScale, Title, Tooltip, Legend, Filler);

const color = [
  "#2ec7c9", "#b6a2de", "#5ab1ef", "#ffb980", "#d87a80",
  "#8d98b3", "#e5cf0d", "#97b552", "#95706d", "#dc69aa",
  "#07a2a4", "#9a7fd1", "#588dd5", "#f5994e", "#c05050",
  "#59678c", "#c9ab00", "#7eb00a", "#6f5553", "#c14089"
];

const Trajectory = ({ TrajectoryData }) => {
  // 默认值处理，防止 undefined 错误
  const data = {
    datasets: TrajectoryData ? TrajectoryData.flatMap((item, index) => [
      // 线图数据集
      {
        label: `${item.name}`, // 数据集名称
        data: item.average.map(point => ({ x: point.FEs, y: point.y })), // 将数据点映射为 {x, y}
        borderColor: color[index % color.length], // 使用颜色数组中的颜色
        backgroundColor: color[index % color.length], // 线条颜色
        tension: 0.4, // 曲线平滑度
        fill: false, // 不填充区域
      },
      // 不确定性区域数据集
      {
        label: `${item.name} - uncertainty`, // 数据集名称
        data: item.uncertainty.map(point => ({ x: point.FEs, y: point.y })), // 不确定性区域数据
        borderColor: color[index % color.length], // 边框颜色
        backgroundColor: `${color[index % color.length]}33`, // 透明背景色表示不确定性
        tension: 0.4, // 曲线平滑度
        fill: true, // 填充区域
      },
    ]) : [],
  };

  const options = {
    scales: {
      x: {
        type: 'linear', // X轴类型为线性
        title: {
          display: true,
          text: 'FEs', // X轴标题
        },
      },
      y: {
        title: {
          display: true,
          text: 'Y', // Y轴标题
        },
        beginAtZero: true,
        nice: true,
        sync: true,
      },
    },
    plugins: {
      tooltip: {
        mode: 'index', // 工具提示框
        intersect: false,
        shared: true,
      },
      legend: {
        display: true,
        position: 'top',
        labels: {
          filter: (legendItem) => !legendItem.text.includes('uncertainty'), // 过滤掉包含 "uncertainty" 的标签
        },
      },
    },
  };

  return (
    <TitleCard title={"Convergence Trajectory"}>
      <Line data={data} options={options}/>
    </TitleCard>
    // <div style={{ width: '100%', height: '400px' }}>
    //   <Line data={data} options={options} />
    // </div>
  );
};

export default Trajectory;


================================================
FILE: webui/src/features/analytics/components/SelectTask.js
================================================
import React, { useState } from "react";

import { MinusCircleOutlined, PlusOutlined } from '@ant-design/icons';
import {
    Button,
    Form,
    Input,
    Space, 
    Select,
    Modal,
} from "antd";


function ASearch({key, name, restField, remove, selections}) {

    return (
        <Space key={key} className="space" style={{ marginBottom: 1 }} align="baseline">
           <Form.Item
             {...restField}
             name={[name, 'TaskName']}
           >
             <Input placeholder="TaskName" style={{ minWidth: 93 }} />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'NumObjs']}
           >
             <Input placeholder="NumObjs" style={{ minWidth: 83 }} />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'NumVars']}
           >
             <Input placeholder="NumVars" style={{ minWidth: 83 }} />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Fidelity']}
           >
             <Input placeholder="Fidelity" style={{ minWidth: 70 }} />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Workload']}
           >
             <Input placeholder="Workload" style={{ minWidth: 85 }} />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Seed']}
           >
             <Input placeholder="Seed" style={{ minWidth: 60 }} />
           </Form.Item>
           
           <Form.Item
             {...restField}
             name={[name, 'Refiner']}
           >
             <Select
               placeholder="Refiner"
               options={selections.Refiner.map(item => ({value: item}))}
             />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Sampler']}
           >
             <Select
               placeholder="Sampler"
               options={selections.Sampler.map(item => ({value: item}))}
             />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Pretrain']}
           >
             <Select
               placeholder="Pretrain"
               options={selections.Pretrain.map(item => ({value: item}))}
             />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Model']}
           >
             <Select
               placeholder="Model"
               options={selections.Model.map(item => ({value: item}))}
             />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'ACF']}
           >
             <Select
               placeholder="ACF"
               options={selections.ACF.map(item => ({value: item}))}
             />
           </Form.Item>

           <Form.Item
             {...restField}
             name={[name, 'Normalizer']}
           >
             <Select
               placeholder="Normalizer"
               options={selections.Normalizer.map(item => ({value: item}))}
             />
           </Form.Item>

           <MinusCircleOutlined style={{color: 'white'}} onClick={() => remove(name)} />
        </Space>
    )
}

function SelectTask({selections, handleClick}) {
  console.log("SelectTask recieve info:" , selections);
  return (
    <Form
      name="dynamic_form_nest_item"
      onFinish={handleClick}
      style={{ width:"100%" }}
      autoComplete="off"
    >
      <Form.List name="Tasks">
        {(fields, { add, remove }) => (
          <>
            <div style={{ overflowY: 'auto', maxHeight: '200px' }}>
            {fields.map(({ key, name, ...restField }) => (
              <ASearch key={key} name={name} restField={restField} remove={remove} selections={selections} />
            ))}
            </div>
            <Form.Item style={{marginTop:20}}>
              <Button type="dashed" onClick={() => add()} icon={<PlusOutlined />} style={{width:"120px"}}>
                Add
              </Button>
            </Form.Item>
            <Form.Item>
              <Button type="primary" htmlType="submit" style={{width:"120px"}}>
                Search
              </Button>
            </Form.Item>
          </>
        )}
      </Form.List>
    </Form>
  )
}

export default SelectTask;

================================================
FILE: webui/src/features/analytics/index.js
================================================
import React from "react";

import {
  Row,
  Col,
} from "reactstrap";

import TitleCard from "../../components/Cards/TitleCard"
import LineChart from './components/LineChart'


import Box from "./charts/Box";
import Trajectory from "./charts/Trajectory";
import SelectTask from "./components/SelectTask.js"
import { Skeleton } from "antd";

class Analytics extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      isFirst: true,
      selections: {},
      BoxData: {},
      TrajectoryData: [],
    };
  }

  handleClick = (values) => {
    console.log("Tasks:", values.Tasks)
    const messageToSend = values.Tasks.map(task => ({
      TaskName: task.TaskName || '',
      NumObjs: task.NumObjs || '',
      NumVars: task.NumVars || '',
      Fidelity: task.Fidelity || '',
      Workload: task.Workload || '',
      Seed: task.Seed || '',
      Refiner: task.Refiner || '',
      Sampler: task.Sampler || '',
      Pretrain: task.Pretrain || '',
      Model: task.Model || '',
      ACF: task.ACF || '',
      Normalizer: task.Normalizer || ''
    }));
    fetch('http://localhost:5001/api/comparison/choose_task', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(messageToSend),
    })
    .then(response => {
      if (!response.ok) {
        throw new Error('Network response was not ok');
      } 
      return response.json();
    })
    .then(data => {
      // console.log('Message from back-end:', data);
      this.setState({ BoxData: data.BoxData, TrajectoryData: data.TrajectoryData });
    })
    .catch((error) => {
      console.error('Error sending message:', error);
    });
  }

  render() {
    if (this.state.isFirst) {
      const messageToSend = {
        message: 'ask for selections',
      }
      fetch('http://localhost:5001/api/comparison/selections', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Message from back-end:', data);
        this.setState({ selections: data , isFirst: false});
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

    } else {
      return (
        <div>
            <div>
                <TitleCard
                  title={
                    <h5>
                      <span className="fw-semi-bold">Filter</span>
                    </h5>
                  }
                  collapse
                >
                  <SelectTask selections={this.state.selections} handleClick={this.handleClick}/>
                </TitleCard>

            <div className="grid mt-4 grid-cols-1 lg:grid-cols-[50%_50%] gap-6">

                <LineChart TrajectoryData={this.state.TrajectoryData} />


                <TitleCard
                    title={
                    <h5>
                        <span className="fw-semi-bold">Box</span>
                    </h5>
                    }
                    collapse
                > 
                    <Box BoxData={this.state.BoxData}/>
                </TitleCard>
          </div>
          </div>
        </div>
      );
    }
  }
}

export default Analytics;


================================================
FILE: webui/src/features/calendar/CalendarEventsBodyRightDrawer.js
================================================
import { CALENDAR_EVENT_STYLE } from "../../components/CalendarView/util"

const THEME_BG = CALENDAR_EVENT_STYLE

function CalendarEventsBodyRightDrawer({filteredEvents}){
    return(
        <>
             {
                filteredEvents.map((e, k) => {
                    return <div key={k} className={`grid mt-3 card  rounded-box p-3 ${THEME_BG[e.theme] || ""}`}>
                            {e.title}
                        </div> 
                })
            }
        </>
    )
}

export default CalendarEventsBodyRightDrawer

================================================
FILE: webui/src/features/calendar/index.js
================================================
import { useState } from 'react'
import CalendarView from '../../components/CalendarView'
import moment from 'moment'
import { CALENDAR_INITIAL_EVENTS } from '../../utils/dummyData'
import { useDispatch } from 'react-redux'
import { openRightDrawer } from '../common/rightDrawerSlice'
import { RIGHT_DRAWER_TYPES } from '../../utils/globalConstantUtil'
import { showNotification } from '../common/headerSlice'


const INITIAL_EVENTS = CALENDAR_INITIAL_EVENTS

function Calendar(){

    const dispatch = useDispatch()

    const [events, setEvents] = useState(INITIAL_EVENTS)

    // Add your own Add Event handler, like opening modal or random event addition
    // Format - {title :"", theme: "", startTime : "", endTime : ""}, typescript version comming soon :)
    const addNewEvent = (date) => {
        let randomEvent = INITIAL_EVENTS[Math.floor(Math.random() * 10)]
        let newEventObj = {title : randomEvent.title, theme : randomEvent.theme, startTime : moment(date).startOf('day'), endTime : moment(date).endOf('day')}
        setEvents([...events, newEventObj])
        dispatch(showNotification({message : "New Event Added!", status : 1}))
    }

    // Open all events of current day in sidebar 
    const openDayDetail = ({filteredEvents, title}) => {
        dispatch(openRightDrawer({header : title, bodyType : RIGHT_DRAWER_TYPES.CALENDAR_EVENTS, extraObject : {filteredEvents}}))
    }

    return(
        <>
           <CalendarView 
                calendarEvents={events}
                addNewEvent={addNewEvent}
                openDayDetail={openDayDetail}
           />
        </>
    )
}

export default Calendar

================================================
FILE: webui/src/features/charts/components/BarChart.js
================================================
import {
  Chart as ChartJS,
  CategoryScale,
  LinearScale,
  BarElement,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';
import { Bar } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';

ChartJS.register(CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend);

function BarChart(){

    const options = {
        responsive: true,
        plugins: {
          legend: {
            position: 'top',
          }
        },
      };
      
      const labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July'];
      
      const data = {
        labels,
        datasets: [
          {
            label: 'Store 1',
            data: labels.map(() => { return Math.random() * 1000 + 500 }),
            backgroundColor: 'rgba(255, 99, 132, 1)',
          },
          {
            label: 'Store 2',
            data: labels.map(() => { return Math.random() * 1000 + 500 }),
            backgroundColor: 'rgba(53, 162, 235, 1)',
          },
        ],
      };

    return(
      <TitleCard title={"No of Orders"} topMargin="mt-2">
            <Bar options={options} data={data} />
      </TitleCard>

    )
}


export default BarChart

================================================
FILE: webui/src/features/charts/components/DoughnutChart.js
================================================
import {
  Chart as ChartJS,
  Filler,
  ArcElement,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';
import { Doughnut } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';
import Subtitle from '../../../components/Typography/Subtitle';

ChartJS.register(ArcElement, Tooltip, Legend,
    Tooltip,
    Filler,
    Legend);

function DoughnutChart(){

    const options = {
        responsive: true,
        plugins: {
          legend: {
            position: 'top',
          },
        },
      };
      
      const labels = ['Electronics', 'Home Applicances', 'Beauty', 'Furniture', 'Watches', 'Apparel'];
      
      const data = {
        labels,
        datasets: [
            {
                label: '# of Orders',
                data: [122, 219, 30, 51, 82, 13],
                backgroundColor: [
                  'rgba(255, 99, 132, 0.8)',
                  'rgba(54, 162, 235, 0.8)',
                  'rgba(255, 206, 86, 0.8)',
                  'rgba(75, 192, 192, 0.8)',
                  'rgba(153, 102, 255, 0.8)',
                  'rgba(255, 159, 64, 0.8)',
                ],
                borderColor: [
                  'rgba(255, 99, 132, 1)',
                  'rgba(54, 162, 235, 1)',
                  'rgba(255, 206, 86, 1)',
                  'rgba(75, 192, 192, 1)',
                  'rgba(153, 102, 255, 1)',
                  'rgba(255, 159, 64, 1)',
                ],
                borderWidth: 1,
              }
        ],
      };

    return(
        <TitleCard title={"Orders by Category"}>
                <Doughnut options={options} data={data} />
        </TitleCard>
    )
}


export default DoughnutChart

================================================
FILE: webui/src/features/charts/components/LineChart.js
================================================
import {
  Chart as ChartJS,
  CategoryScale,
  LinearScale,
  PointElement,
  LineElement,
  Title,
  Tooltip,
  Filler,
  Legend,
} from 'chart.js';
import { Line } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';

ChartJS.register(
  CategoryScale,
  LinearScale,
  PointElement,
  LineElement,
  Title,
  Tooltip,
  Filler,
  Legend
);

function LineChart(){

  const options = {
    responsive: true,
    plugins: {
      legend: {
        position: 'top',
      },
    },
  };

  
  const labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July'];

  const data = {
  labels,
  datasets: [
    {
      fill: true,
      label: 'MAU',
      data: labels.map(() => { return Math.random() * 100 + 500 }),
      borderColor: 'rgb(53, 162, 235)',
      backgroundColor: 'rgba(53, 162, 235, 0.5)',
    },
  ],
};
  

    return(
      <TitleCard title={"Montly Active Users (in k)"} >
          <Line data={data} options={options}/>
      </TitleCard>
    )
}


export default LineChart

================================================
FILE: webui/src/features/charts/components/PieChart.js
================================================
import {
    Chart as ChartJS,
    Filler,
    ArcElement,
    Title,
    Tooltip,
    Legend,
  } from 'chart.js';
  import { Pie } from 'react-chartjs-2';
  import TitleCard from '../../../components/Cards/TitleCard';
  import Subtitle from '../../../components/Typography/Subtitle';
  
  ChartJS.register(ArcElement, Tooltip, Legend,
      Tooltip,
      Filler,
      Legend);
  
  function PieChart(){
  
      const options = {
          responsive: true,
          plugins: {
            legend: {
              position: 'top',
            },
          },
        };
        
        const labels = ['India', 'Middle East', 'Europe', 'US', 'Latin America', 'Asia(non-india)'];
        
        const data = {
          labels,
          datasets: [
              {
                  label: '# of Orders',
                  data: [122, 219, 30, 51, 82, 13],
                  backgroundColor: [
                    'rgba(255, 99, 255, 0.8)',
                    'rgba(54, 162, 235, 0.8)',
                    'rgba(255, 206, 255, 0.8)',
                    'rgba(75, 192, 255, 0.8)',
                    'rgba(153, 102, 255, 0.8)',
                    'rgba(255, 159, 255, 0.8)',
                  ],
                  borderColor: [
                    'rgba(255, 99, 255, 1)',
                    'rgba(54, 162, 235, 1)',
                    'rgba(255, 206, 255, 1)',
                    'rgba(75, 192, 255, 1)',
                    'rgba(153, 102, 255, 1)',
                    'rgba(255, 159, 255, 1)',
                  ],
                  borderWidth: 1,
                }
          ],
        };
  
      return(
          <TitleCard title={"Orders by country"}>
                  <Pie options={options} data={data} />
          </TitleCard>
      )
  }
  
  
  export default PieChart

================================================
FILE: webui/src/features/charts/components/ScatterChart.js
================================================
import {
    Chart as ChartJS,
    Filler,
    ArcElement,
    Tooltip,
    Legend,
  } from 'chart.js';
  import { Scatter } from 'react-chartjs-2';
  import TitleCard from '../../../components/Cards/TitleCard';
  
  ChartJS.register(ArcElement, Tooltip, Legend,
      Tooltip,
      Filler,
      Legend);
  
  function ScatterChart(){
  
      const options = {
            scales: {
                y: {
                    beginAtZero: true,
                },
            },
        };
        
        const data = {
          datasets: [
            {
              label: 'Orders > 1k',
              data: Array.from({ length: 100 }, () => ({
                x: Math.random() * 11,
                y: Math.random() * 31,
              })),
              backgroundColor: 'rgba(255, 99, 132, 1)',
            },
            {
                label: 'Orders > 2K',
                data: Array.from({ length: 100 }, () => ({
                  x: Math.random() * 12,
                  y: Math.random() * 12,
                })),
                backgroundColor: 'rgba(0, 0, 255, 1)',
              },
          ],
        };
  
      return(
          <TitleCard title={"No of Orders by month (in k)"}>
                  <Scatter options={options} data={data} />
          </TitleCard>
      )
  }
  
  
  export default ScatterChart

================================================
FILE: webui/src/features/charts/components/StackBarChart.js
================================================
import {
    Chart as ChartJS,
    CategoryScale,
    LinearScale,
    BarElement,
    Title,
    Tooltip,
    Legend,
  } from 'chart.js';
  import { Bar } from 'react-chartjs-2';
  import TitleCard from '../../../components/Cards/TitleCard';
  
  ChartJS.register(CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend);
  
  function StackBarChart(){
  
      const options = {
            responsive: true,
            scales: {
                x: {
                    stacked: true,
                },
                y: {
                    stacked: true,
                },
            },
        };
        
        const labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July'];
        
        const data = {
          labels,
          datasets: [
            {
              label: 'Store 1',
              data: labels.map(() => { return Math.random() * 1000 + 500 }),
              backgroundColor: 'rgba(255, 99, 132, 1)',
            },
            {
              label: 'Store 2',
              data: labels.map(() => { return Math.random() * 1000 + 500 }),
              backgroundColor: 'rgba(53, 162, 235, 1)',
            },
            {
                label: 'Store 3',
                data: labels.map(() => { return Math.random() * 1000 + 500 }),
                backgroundColor: 'rgba(235, 162, 235, 1)',
              },
          ],
        };
  
      return(
        <TitleCard title={"Sales"} topMargin="mt-2">
              <Bar options={options} data={data} />
        </TitleCard>
  
      )
  }
  
  
  export default StackBarChart

================================================
FILE: webui/src/features/charts/index.js
================================================
import LineChart from './components/LineChart'
import BarChart from './components/BarChart'
import DoughnutChart from './components/DoughnutChart'
import PieChart from './components/PieChart'
import ScatterChart from '../dashboard/components/ScatterChart'
import StackBarChart from './components/StackBarChart'
import Datepicker from "react-tailwindcss-datepicker"; 
import { useState } from 'react'


function Charts(){

    const [dateValue, setDateValue] = useState({ 
        startDate: new Date(), 
        endDate: new Date() 
    }); 
    
    const handleDatePickerValueChange = (newValue) => {
        console.log("newValue:", newValue); 
        setDateValue(newValue); 
    } 

    return(
        <>
        <Datepicker 
                containerClassName="w-72" 
                value={dateValue} 
                theme={"light"}
                inputClassName="input input-bordered w-72" 
                popoverDirection={"down"}
                toggleClassName="invisible"
                onChange={handleDatePickerValueChange} 
                showShortcuts={true} 
                primaryColor={"white"} 
            /> 
        {/** ---------------------- Different charts ------------------------- */}
            <div className="grid lg:grid-cols-2 mt-0 grid-cols-1 gap-6">
                <StackBarChart />
                <BarChart />
            </div>

        
            <div className="grid lg:grid-cols-2 mt-4 grid-cols-1 gap-6">
                <DoughnutChart />
                <PieChart />
            </div>

            <div className="grid lg:grid-cols-2 mt-4 grid-cols-1 gap-6">
                <ScatterChart />
                <LineChart />
            </div>
        </>
    )
}

export default Charts

================================================
FILE: webui/src/features/chatbot/ChatBot.js
================================================

import React from 'react';
import { Row, Col } from 'reactstrap';
import TitleCard from "../../components/Cards/TitleCard"
import ChatUI from './components/ChatUI'

class ChatBot extends React.Component {

    render() {
        return (
          <div>
          <div className="mt-4 w-[1400px] p-4 bg-gray-100">

                <TitleCard
                  title={
                    <h5>
                      ChatOpt
                    </h5>
                  }
                >
                    <ChatUI />
                </TitleCard>
            </div>
          </div>
        );
    }
}


export default ChatBot

================================================
FILE: webui/src/features/chatbot/components/ChatUI.js
================================================
import React from 'react';
import Chat, { Bubble, useMessages } from '@chatui/core';
import '@chatui/core/dist/index.css';
import './chatui-theme.css';

function ChatUI() {
  const { messages, appendMsg, setTyping } = useMessages([]);

  function handleSend(type, val) {
    if (type === 'text' && val.trim()) {
      appendMsg({
        type: 'text',
        content: { text: val },
        position: 'right',
        sender: 'you',
        time: new Date().toLocaleTimeString(),
      });

      const messageToSend = {
        type: 'text',
        content: { text: val },
      };

      fetch('http://localhost:5001/api/generate-yaml', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
        .then((response) => {
          if (!response.ok) {
            throw new Error('Network response was not ok');
          }
          return response.json();
        })
        .then((data) => {
          console.log('Message sent successfully:', data);
          appendMsg({
            type: 'text',
            content: { text: data.message },
            position: 'left',
            sender: 'robot',
            time: new Date().toLocaleTimeString(),
          });
        })
        .catch((error) => {
          console.error('Error sending message:', error);
          appendMsg({
            type: 'text',
            content: { text: 'There was an error processing your message.' },
            position: 'left',
            sender: 'robot',
            time: new Date().toLocaleTimeString(),
          });
        });
    }
  }

  function renderMessageContent(msg) {
    const { content, sender, position, time } = msg;
    const isLeft = position === 'left';

    // 头像路径
    const robotAvatar = '/robot.png'; // 替换为机器人头像的路径

    const bubbleStyle = {
      backgroundColor: isLeft ? '#E0E0E0' : '#81C784', // 左边灰色，右边绿色
      color: '#000',
      padding: '10px',
      borderRadius: '8px',
    };


    return (
      <div style={{ display: 'flex', alignItems: 'flex-start',justifyContent: isLeft ? 'flex-start' : 'flex-end', marginBottom: '10px' }}>
        {isLeft && (
          <img
            src={robotAvatar}
            alt="robot"
            style={{ width: '40px', height: '40px', borderRadius: '50%', marginRight: '10px' }}
          />
        )}
        <div>
          <div style={{ fontSize: '12px', color: '#555', marginBottom: '4px' }}>
            {sender === 'robot' ? 'ChatOPT' : 'You'}
          </div>
          <Bubble style={bubbleStyle}>
            <div style={{ display: 'flex', flexDirection: 'column' }}>
              <span>{content.text}</span>
              <span style={{ fontSize: '12px', color: '#999', marginTop: '5px', alignSelf: 'flex-end' }}>
                {time}
              </span>
            </div>
          </Bubble>
        </div>
      </div>
    );
  }


  return (
    <Chat 
      messages={messages}
      renderMessageContent={renderMessageContent}
      onSend={handleSend}
      placeholder='Send a message...'
      locale='en-US'
    />
  );
}

export default ChatUI;


================================================
FILE: webui/src/features/chatbot/components/chatui-theme.css
================================================
:root {
    --brand-1: rgb(53, 162, 235);
    --brand-2: rgb(53, 162, 235);
    --brand-3: rgb(53, 162, 235);
    --marked-color: #111;
    font-size: 16px;
    --btn-primary-color: #111;
  }
  .ChatApp,
  .MessageContainer,
  .Navbar,
  .Message .Bubble,
  .QuickReplies,
  .ChatFooter {
    background-repeat: no-repeat;
    background-size: cover;
    background-color: rgba(0, 0, 0, 0);
  }
  .ChatApp {
    background-color: rgba(0, 0, 0, 0);
  }
  /* .Navbar {
    background-color: rgba(0, 0, 0, 0);
    border: 0;
    box-shadow: none;
  } */

  /* .Navbar {
    display: flex;
    justify-content: space-between;
    align-items: center;
    padding: 10px 20px;
    background-color: #000000;
    color: #000000;
  } */

  .Navbar-title {
    color: #8f2020;
  }
  .Message.left .Bubble {
    background-color: rgb(53, 162, 235, 0.3);
    color: #111;
    font-size: 16px;
    font-family: 'Arial', sans-serif; 
    font-weight: bold; 
    line-height: 1.5;
  }
  .Message.right .Bubble {
    background-color: rgb(53, 162, 235, 0.3);
    color: #111;
    font-size: 16px;
    font-family: 'Arial', sans-serif; 
    font-weight: bold; 
    line-height: 1.5;
  }

  
================================================
FILE: webui/src/features/common/components/ConfirmationModalBody.js
================================================
import {useDispatch, useSelector} from 'react-redux'
import axios from 'axios'
import { CONFIRMATION_MODAL_CLOSE_TYPES, MODAL_CLOSE_TYPES } from '../../../utils/globalConstantUtil'
import { deleteLead } from '../../leads/leadSlice'
import { showNotification } from '../headerSlice'

function ConfirmationModalBody({ extraObject, closeModal}){

    const dispatch = useDispatch()

    const { message, type, _id, index} = extraObject


    const proceedWithYes = async() => {
        if(type === CONFIRMATION_MODAL_CLOSE_TYPES.LEAD_DELETE){
            // positive response, call api or dispatch redux function
            dispatch(deleteLead({index}))
            dispatch(showNotification({message : "Lead Deleted!", status : 1}))
        }
        closeModal()
    }

    return(
        <> 
        <p className=' text-xl mt-8 text-center'>
            {message}
        </p>

        <div className="modal-action mt-12">
                
                <button className="btn btn-outline   " onClick={() => closeModal()}>Cancel</button>

                <button className="btn btn-primary w-36" onClick={() => proceedWithYes()}>Yes</button> 

        </div>
        </>
    )
}

export default ConfirmationModalBody

================================================
FILE: webui/src/features/common/components/NotificationBodyRightDrawer.js
================================================
function NotificationBodyRightDrawer(){
    return(
        <>
             {
                [...Array(15)].map((_, i) => {
                    return <div key={i} className={"grid mt-3 card bg-base-200 rounded-box p-3" + (i < 5 ? " bg-blue-100" : "")}>
                            {i % 2 === 0 ? `Your sales has increased by 30% yesterday` : `Total likes for instagram post - New launch this week,  has crossed 100k `}
                        </div> 
                })
            }
        </>
    )
}

export default NotificationBodyRightDrawer

================================================
FILE: webui/src/features/common/headerSlice.js
================================================
import { createSlice } from '@reduxjs/toolkit'

export const headerSlice = createSlice({
    name: 'header',
    initialState: {
        pageTitle: "Home",  // current page title state management
        noOfNotifications : 15,  // no of unread notifications
        newNotificationMessage : "",  // message of notification to be shown
        newNotificationStatus : 1,   // to check the notification type -  success/ error/ info
    },
    reducers: {
        setPageTitle: (state, action) => {
            state.pageTitle = action.payload.title
        },


        removeNotificationMessage: (state, action) => {
            state.newNotificationMessage = ""
        },

        showNotification: (state, action) => {
            state.newNotificationMessage = action.payload.message
            state.newNotificationStatus = action.payload.status
        },
    }
})

export const { setPageTitle, removeNotificationMessage, showNotification } = headerSlice.actions

export default headerSlice.reducer

================================================
FILE: webui/src/features/common/modalSlice.js
================================================
import { createSlice } from '@reduxjs/toolkit'

export const modalSlice = createSlice({
    name: 'modal',
    initialState: {
        title: "",  // current  title state management
        isOpen : false,   // modal state management for opening closing
        bodyType : "",   // modal content management
        size : "",   // modal content management
        extraObject : {},   
    },
    reducers: {

        openModal: (state, action) => {
            const {title, bodyType, extraObject, size} = action.payload
            state.isOpen = true
            state.bodyType = bodyType
            state.title = title
            state.size = size || 'md'
            state.extraObject = extraObject
        },

        closeModal: (state, action) => {
            state.isOpen = false
            state.bodyType = ""
            state.title = ""
            state.extraObject = {}
        },

    }
})

export const { openModal, closeModal } = modalSlice.actions

export default modalSlice.reducer

================================================
FILE: webui/src/features/common/rightDrawerSlice.js
================================================
import { createSlice } from '@reduxjs/toolkit'

export const rightDrawerSlice = createSlice({
    name: 'rightDrawer',
    initialState: {
        header: "",  // current  title state management
        isOpen : false,   // right drawer state management for opening closing
        bodyType : "",   // right drawer content management
        extraObject : {},   
    },
    reducers: {

        openRightDrawer: (state, action) => {
            const {header, bodyType, extraObject} = action.payload
            state.isOpen = true
            state.bodyType = bodyType
            state.header = header
            state.extraObject = extraObject
        },

        closeRightDrawer: (state, action) => {
            state.isOpen = false
            state.bodyType = ""
            state.header = ""
            state.extraObject = {}
        },

    }
})

export const { openRightDrawer, closeRightDrawer } = rightDrawerSlice.actions

export default rightDrawerSlice.reducer

================================================
FILE: webui/src/features/dashboard/components/AmountStats.js
================================================


function AmountStats({}){
    return(
        <div className="stats bg-base-100 shadow">
            <div className="stat">
                <div className="stat-title">Amount to be Collected</div>
                <div className="stat-value">$25,600</div>
                <div className="stat-actions">
                    <button className="btn btn-xs">View Users</button> 
                </div>
            </div>
            
            <div className="stat">
                <div className="stat-title">Cash in hand</div>
                <div className="stat-value">$5,600</div>
                <div className="stat-actions">
                    <button className="btn btn-xs">View Members</button> 
                </div>
            </div>
        </div>
    )
}

export default AmountStats

================================================
FILE: webui/src/features/dashboard/components/BarChart.js
================================================
import {
  Chart as ChartJS,
  CategoryScale,
  LinearScale,
  BarElement,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';
import { Bar } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';

ChartJS.register(CategoryScale, LinearScale, BarElement, Title, Tooltip, Legend);

function BarChart({ ImportanceData }){

    const options = {
        responsive: true,
        plugins: {
          legend: {
            position: 'top',
          }
        },
      };
      
      const labels = ['x1', 'x2', 'x3', 'x4'];
      
      const data = {
        labels,
        datasets: [
          {
            label: 'Importance level',
            data: labels.map(() => { return Math.random() * 0.1 + 0.7 }),
            backgroundColor: 'rgba(255, 99, 132, 1)',
          },
        ],
      };

    return(
      <TitleCard title={"Importance of variables"}>
            <Bar options={options} data={data} />
      </TitleCard>

    )
}


export default BarChart

================================================
FILE: webui/src/features/dashboard/components/DashboardStats.js
================================================
function DashboardStats({title, icon, value, description, colorIndex}){

    const COLORS = ["primary", "primary"]

    const getDescStyle = () => {
        if(description.includes("↗︎"))return "font-bold text-green-700 dark:text-green-300"
        else if(description.includes("↙"))return "font-bold text-rose-500 dark:text-red-400"
        else return ""
    }

    return(
        <div className="stats shadow">
            <div className="stat">
                <div className={`stat-figure dark:text-slate-300 text-${COLORS[colorIndex%2]}`}>{icon}</div>
                <div className="stat-title dark:text-slate-300">{title}</div>
                <div className={`stat-value dark:text-slate-300 text-${COLORS[colorIndex%2]}`}>{value}</div>
                <div className={"stat-desc  " + getDescStyle()}>{description}</div>
            </div>
        </div>
    )
}

export default DashboardStats

================================================
FILE: webui/src/features/dashboard/components/DashboardTopBar.js
================================================
import SelectBox from "../../../components/Input/SelectBox"
import ArrowDownTrayIcon  from '@heroicons/react/24/outline/ArrowDownTrayIcon'
import ShareIcon  from '@heroicons/react/24/outline/ShareIcon'
import EnvelopeIcon  from '@heroicons/react/24/outline/EnvelopeIcon'
import EllipsisVerticalIcon  from '@heroicons/react/24/outline/EllipsisVerticalIcon'
import ArrowPathIcon  from '@heroicons/react/24/outline/ArrowPathIcon'
import { useState } from "react"
import Datepicker from "react-tailwindcss-datepicker"; 


const periodOptions = [
    {name : "Today", value : "TODAY"},
    {name : "Yesterday", value : "YESTERDAY"},
    {name : "This Week", value : "THIS_WEEK"},
    {name : "Last Week", value : "LAST_WEEK"},
    {name : "This Month", value : "THIS_MONTH"},
    {name : "Last Month", value : "LAST_MONTH"},
]

function DashboardTopBar({updateDashboardPeriod}){

        const [dateValue, setDateValue] = useState({ 
            startDate: new Date(), 
            endDate: new Date() 
        }); 
        
        const handleDatePickerValueChange = (newValue) => {
            console.log("newValue:", newValue); 
            setDateValue(newValue); 
            updateDashboardPeriod(newValue)
        } 


    return(
        <div className="grid grid-cols-1 sm:grid-cols-2 gap-4">
            <div className="">
            <Datepicker 
                containerClassName="w-72 " 
                value={dateValue} 
                theme={"light"}
                inputClassName="input input-bordered w-72" 
                popoverDirection={"down"}
                toggleClassName="invisible"
                onChange={handleDatePickerValueChange} 
                showShortcuts={true} 
                primaryColor={"white"} 
            /> 
            {/* <SelectBox 
                options={periodOptions}
                labelTitle="Period"
                placeholder="Select date range"
                containerStyle="w-72"
                labelStyle="hidden"
                defaultValue="TODAY"
                updateFormValue={updateSelectBoxValue}
            /> */}
            </div>
            <div className="text-right ">
                <button className="btn btn-ghost btn-sm normal-case"><ArrowPathIcon className="w-4 mr-2"/>Refresh Data</button>
                <button className="btn btn-ghost btn-sm normal-case  ml-2"><ShareIcon className="w-4 mr-2"/>Share</button>

                <div className="dropdown dropdown-bottom dropdown-end  ml-2">
                    <label tabIndex={0} className="btn btn-ghost btn-sm normal-case btn-square "><EllipsisVerticalIcon className="w-5"/></label>
                    <ul tabIndex={0} className="dropdown-content menu menu-compact  p-2 shadow bg-base-100 rounded-box w-52">
                        <li><a><EnvelopeIcon className="w-4"/>Email Digests</a></li>
                        <li><a><ArrowDownTrayIcon className="w-4"/>Download</a></li>
                    </ul>
                </div>
            </div>
        </div>
    )
}

export default DashboardTopBar

================================================
FILE: webui/src/features/dashboard/components/DoughnutChart.js
================================================
import {
  Chart as ChartJS,
  Filler,
  ArcElement,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';
import { Doughnut } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';
import Subtitle from '../../../components/Typography/Subtitle';

ChartJS.register(ArcElement, Tooltip, Legend,
    Tooltip,
    Filler,
    Legend);

function DoughnutChart(){

    const options = {
        responsive: true,
        plugins: {
          legend: {
            position: 'top',
          },
        },
      };
      
      const labels = ['Electronics', 'Home Applicances', 'Beauty', 'Furniture', 'Watches', 'Apparel'];
      
      const data = {
        labels,
        datasets: [
            {
                label: '# of Orders',
                data: [122, 219, 30, 51, 82, 13],
                backgroundColor: [
                  'rgba(255, 99, 132, 0.8)',
                  'rgba(54, 162, 235, 0.8)',
                  'rgba(255, 206, 86, 0.8)',
                  'rgba(75, 192, 192, 0.8)',
                  'rgba(153, 102, 255, 0.8)',
                  'rgba(255, 159, 64, 0.8)',
                ],
                borderColor: [
                  'rgba(255, 99, 132, 1)',
                  'rgba(54, 162, 235, 1)',
                  'rgba(255, 206, 86, 1)',
                  'rgba(75, 192, 192, 1)',
                  'rgba(153, 102, 255, 1)',
                  'rgba(255, 159, 64, 1)',
                ],
                borderWidth: 1,
              }
        ],
      };

    return(
        <TitleCard title={"Orders by Category"}>
                <Doughnut options={options} data={data} />
        </TitleCard>
    )
}


export default DoughnutChart

================================================
FILE: webui/src/features/dashboard/components/Footprint.js
================================================
import React from 'react';
import { Scatter } from 'react-chartjs-2';
import {
  Chart as ChartJS,
  ScatterController,
  PointElement,
  LinearScale,
  Title,
  Tooltip,
  Legend,
} from 'chart.js';

// 注册必要的组件
ChartJS.register(ScatterController, PointElement, LinearScale, Title, Tooltip, Legend);

function Footprint({ ScatterData = {} }) { // 提供默认值为空对象
  // 检查 ScatterData 是否为对象
  if (!ScatterData || typeof ScatterData !== 'object') {
    console.error('ScatterData is not a valid object:', ScatterData);
    return null; // 处理无效数据时返回 null 以避免进一步错误
  }

  // 转换数据为 Chart.js 可识别的格式
  const datasets = Object.keys(ScatterData).map((key, index) => ({
    label: key, // 数据集名称
    data: ScatterData[key].map(point => ({ x: point[0], y: point[1] })), // 将数据转换为 {x, y} 格式
    backgroundColor: 'rgba(75, 192, 192, 1)', // 设置散点的颜色，可以根据需要调整
    borderColor: 'rgba(75, 192, 192, 1)',
    pointRadius: 5, // 散点的大小
  }));

  // 定义图表选项
  const options = {
    plugins: {
      legend: {
        labels: {
          color: '#ffffff', // 设置图例文本颜色
        },
      },
      tooltip: {
        enabled: true, // 启用工具提示
      },
    },
    scales: {
      x: {
        type: 'linear', // X轴类型为线性
        position: 'bottom',
        ticks: {
          color: '#ffffff', // X轴标签颜色
        },
      },
      y: {
        type: 'linear', // Y轴类型为线性
        ticks: {
          color: '#ffffff', // Y轴标签颜色
        },
      },
    },
  };

  // 图表数据
  const data = {
    datasets, // 使用转换后的数据集
  };

  return <Scatter data={data} options={options} />;
}

export default Footprint;


================================================
FILE: webui/src/features/dashboard/components/Importance.js
================================================
import React, {useState, useEffect} from 'react';

function Importance() {  
  const [imageUrl, setImageUrl] = useState(require('../../../exp_pictures/parameter_network.png'));

  useEffect(() => {
    // 在组件加载时自动更换图片
    setImageUrl(require('../../../exp_pictures/parameter_network.png'));
  }, []);

  return <img src={imageUrl + '?' + new Date().getTime()} alt="network" style={{ width: 'auto', height: 'auto', maxWidth: '100%', maxHeight: '100%' }} /> 
};

export default Importance;

================================================
FILE: webui/src/features/dashboard/components/LineChart.js
================================================
import React from 'react';
import { Line } from 'react-chartjs-2';
import TitleCard from '../../../components/Cards/TitleCard';

import {
  Chart as ChartJS,
  LineElement,
  PointElement,
  LinearScale,
  Title,
  CategoryScale,
  Tooltip,
  Legend,
  Filler,
} from 'chart.js';

// 注册图表组件
ChartJS.register(LineElement, PointElement, LinearScale, CategoryScale, Title, Tooltip, Legend, Filler);

const color = [
  "#2ec7c9", "#b6a2de", "#5ab1ef", "#ffb980", "#d87a80",
  "#8d98b3", "#e5cf0d", "#97b552", "#95706d", "#dc69aa",
  "#07a2a4", "#9a7fd1", "#588dd5", "#f5994e", "#c05050",
  "#59678c", "#c9ab00", "#7eb00a", "#6f5553", "#c14089"
];

const Trajectory = ({ TrajectoryData }) => {
  // 默认值处理，防止 undefined 错误
  const data = {
    datasets: TrajectoryData ? TrajectoryData.flatMap((item, index) => [
      // 线图数据集
      {
        label: `${item.name}`, // 数据集名称
        data: item.average.map(point => ({ x: point.FEs, y: point.y })), // 将数据点映射为 {x, y}
        borderColor: color[index % color.length], // 使用颜色数组中的颜色
        backgroundColor: color[index % color.length], // 线条颜色
        tension: 0.4, // 曲线平滑度
        fill: false, // 不填充区域
      },
      // 不确定性区域数据集
      {
        label: `${item.name} - uncertainty`, // 数据集名称
        data: item.uncertainty.map(point => ({ x: point.FEs, y: point.y })), // 不确定性区域数据
        borderColor: color[index % color.length], // 边框颜色
        backgroundColor: `${color[index % color.length]}33`, // 透明背景色表示不确定性
        tension: 0.4, // 曲线平滑度
        fill: true, // 填充区域
      },
    ]) : [],
  };

  const options = {
    scales: {
      x: {
        type: 'linear', // X轴类型为线性
        title: {
          display: true,
          text: 'FEs', // X轴标题
        },
      },
      y: {
        title: {
          display: true,
          text: 'Y', // Y轴标题
        },
        beginAtZero: true,
        nice: true,
        sync: true,
      },
    },
    plugins: {
      tooltip: {
        mode: 'index', // 工具提示框
        intersect: false,
        shared: true,
      },
      legend: {
        display: true,
        position: 'top',
        labels: {
          filter: (legendItem) => !legendItem.text.includes('uncertainty'), // 过滤掉包含 "uncertainty" 的标签
        },
      },
    },
  };

  return (
    <TitleCard title={"Convergence Trajectory"}>
      <Line data={data} options={options}/>
    </TitleCard>
    // <div style={{ width: '100%', height: '400px' }}>
    //   <Line data={data} options={options} />
    // </div>
  );
};

export default Trajectory;


================================================
FILE: webui/src/features/dashboard/components/PageStats.js
================================================
import HeartIcon  from '@heroicons/react/24/outline/HeartIcon'
import BoltIcon  from '@heroicons/react/24/outline/BoltIcon'


function PageStats({}){
    return(
        <div className="stats bg-base-100 shadow">
  
  <div className="stat">
    <div className="stat-figure invisible md:visible">
        <HeartIcon className='w-8 h-8'/>
    </div>
    <div className="stat-title">Total Likes</div>
    <div className="stat-value">25.6K</div>
    <div className="stat-desc">21% more than last month</div>
  </div>
  
  <div className="stat">
    <div className="stat-figure invisible md:visible">
        <BoltIcon className='w-8 h-8'/>
    </div>
    <div className="stat-title">Page Views</div>
    <div className="stat-value">2.6M</div>
    <div className="stat-desc">14% more than last month</div>
  </div>
</div>
    )
}

export default PageStats

================================================
FILE: webui/src/features/dashboard/components/ScatterChart.js
================================================
import React from 'react';
import { Scatter } from 'react-chartjs-2';
import {
  Chart as ChartJS,
  ScatterController,
  PointElement,
  LinearScale,
  Title,
  Tooltip,
  Legend,
  Filler,
} from 'chart.js';
import TitleCard from '../../../components/Cards/TitleCard';

// 注册必要的组件
ChartJS.register(
  ScatterController,
  PointElement,
  LinearScale,
  Title,
  Tooltip,
  Legend,
  Filler
);


const colors = [
  'rgba(255, 99, 132, 0.5)', // 红色
  'rgba(54, 162, 235, 0.5)', // 蓝色
  'rgba(255, 206, 86, 0.5)', // 黄色
];

// 转换数据为 Chart.js 可识别的格式


function Footprint({ ScatterData = {} }) {
  // 检查 ScatterData 是否为对象
  if (!ScatterData || typeof ScatterData !== 'object') {
    console.error('ScatterData is not a valid object:', ScatterData);
    return null; // 处理无效数据时返回 null 以避免进一步错误
  }

  const datasets = Object.keys(ScatterData).map((key, index) => ({
    label: key, // 数据集名称
    data: ScatterData[key].map(point => ({ x: point[0], y: point[1] })), // 将数据转换为 {x, y} 格式
    backgroundColor: colors[index % colors.length], // 设置不同的数据集颜色
    borderColor: colors[index % colors.length],
    pointRadius: 5, // 散点的大小
  }));

  // 定义图表选项
  const options = {
    scales: {
      x: {
        type: 'linear', // X轴类型为线性
      },
      y: {
        type: 'linear', // Y轴类型为线性
      },
    },
    plugins: {

      tooltip: {
        enabled: true, // 启用工具提示
      },
    },
  };

  // 图表数据
  const data = {
    datasets, // 使用转换后的数据集
  };

  return (
    <TitleCard title={"Footprint"}>
        <Scatter data={data} options={options}/>
    </TitleCard>
  );
}

export default Footprint;


================================================
FILE: webui/src/features/dashboard/components/Trajectory.js
================================================
import React from 'react';
import { Chart, Area, Line, Tooltip, View } from 'bizcharts';

const scale = {
  y: { 
    sync: true,
    nice: true,
  },
  FEs: {
    type: 'linear',
    nice: true,
  },
};

const color = [
  "#2ec7c9", "#b6a2de", "#5ab1ef", "#ffb980", "#d87a80", "#8d98b3", 
  "#e5cf0d", "#97b552", "#95706d", "#dc69aa", "#07a2a4", "#9a7fd1", 
  "#588dd5", "#f5994e", "#c05050", "#59678c", "#c9ab00", "#7eb00a", 
  "#6f5553", "#c14089"
];

const Trajectory = ({ TrajectoryData }) => {
  return (
    <Chart id="chart" scale={scale} height={400} autoFit>
      <Tooltip shared />
      {TrajectoryData.map((item, index) => (
        <React.Fragment key={index}>
          <View data={item.average} scale={{ y: { alias: `${item.name}` } }}>
            <Line position="FEs*y" color={color[index]} />
          </View>
          <View data={item.uncertainty} scale={{ y: { alias: `${item.name}-uncertainty` } }}>
            <Area position="FEs*y" color={color[index]} shape="smooth" />
          </View>
        </React.Fragment>
      ))}
    </Chart>
  );
};

export default Trajectory;


================================================
FILE: webui/src/features/dashboard/components/UserChannels.js
================================================
import TitleCard from "../../../components/Cards/TitleCard"

const userSourceData = [
    {source : "Facebook Ads", count : "26,345", conversionPercent : 10.2},
    {source : "Google Ads", count : "21,341", conversionPercent : 11.7},
    {source : "Instagram Ads", count : "34,379", conversionPercent : 12.4},
    {source : "Affiliates", count : "12,359", conversionPercent : 20.9},
    {source : "Organic", count : "10,345", conversionPercent : 10.3},
]

function UserChannels(){
    return(
        <TitleCard title={"User Signup Source"}>
             {/** Table Data */}
             <div className="overflow-x-auto">
                <table className="table w-full">
                    <thead>
                    <tr>
                        <th></th>
                        <th className="normal-case">Source</th>
                        <th className="normal-case">No of Users</th>
                        <th className="normal-case">Conversion</th>
                    </tr>
                    </thead>
                    <tbody>
                        {
                            userSourceData.map((u, k) => {
                                return(
                                    <tr key={k}>
                                        <th>{k+1}</th>
                                        <td>{u.source}</td>
                                        <td>{u.count}</td>
                                        <td>{`${u.conversionPercent}%`}</td>
                                    </tr>
                                )
                            })
                        }
                    </tbody>
                </table>
            </div>
        </TitleCard>
    )
}

export default UserChannels

================================================
FILE: webui/src/features/dashboard/components/my_theme.json
================================================
{
    "version": 1,
    "themeName": "macarons",
    "theme": {
        "seriesCnt": "4",
        "titleColor": "#ffffff",
        "subtitleColor": "rgba(170,170,170,0.92)",
        "textColorShow": false,
        "textColor": "#333",
        "markTextColor": "#ffffff",
        "color": [
            "#2ec7c9",
            "#b6a2de",
            "#5ab1ef",
            "#ffb980",
            "#d87a80",
            "#8d98b3",
            "#e5cf0d",
            "#97b552",
            "#95706d",
            "#dc69aa",
            "#07a2a4",
            "#9a7fd1",
            "#588dd5",
            "#f5994e",
            "#c05050",
            "#59678c",
            "#c9ab00",
            "#7eb00a",
            "#6f5553",
            "#c14089"
        ],
        "borderColor": "#ffffff",
        "borderWidth": "0",
        "visualMapColor": [
            "#5ab1ef",
            "#e0ffff"
        ],
        "legendTextColor": "#ffffff",
        "kColor": "#d87a80",
        "kColor0": "#2ec7c9",
        "kBorderColor": "#d87a80",
        "kBorderColor0": "#2ec7c9",
        "kBorderWidth": 1,
        "lineWidth": 2,
        "symbolSize": 3,
        "symbol": "emptyCircle",
        "symbolBorderWidth": 1,
        "lineSmooth": true,
        "graphLineWidth": 1,
        "graphLineColor": "#aaaaaa",
        "mapLabelColor": "#d87a80",
        "mapLabelColorE": "rgb(100,0,0)",
        "mapBorderColor": "#eeeeee",
        "mapBorderColorE": "#444",
        "mapBorderWidth": 0.5,
        "mapBorderWidthE": 1,
        "mapAreaColor": "#dddddd",
        "mapAreaColorE": "rgba(254,153,78,1)",
        "axes": [
            {
                "type": "all",
                "name": "通用坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#eeeeee",
                "axisTickShow": true,
                "axisTickColor": "#eeeeee",
                "axisLabelShow": true,
                "axisLabelColor": "#eeeeee",
                "splitLineShow": true,
                "splitLineColor": [
                    "#aaaaaa"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "#eeeeee"
                ]
            },
            {
                "type": "category",
                "name": "类目坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": false,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "value",
                "name": "数值坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "log",
                "name": "对数坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            },
            {
                "type": "time",
                "name": "时间坐标轴",
                "axisLineShow": true,
                "axisLineColor": "#ffffff",
                "axisTickShow": true,
                "axisTickColor": "#ffffff",
                "axisLabelShow": true,
                "axisLabelColor": "#ffffff",
                "splitLineShow": true,
                "splitLineColor": [
                    "#ffffff"
                ],
                "splitAreaShow": false,
                "splitAreaColor": [
                    "rgba(250,250,250,0.3)",
                    "rgba(200,200,200,0.3)"
                ]
            }
        ],
        "axisSeperateSetting": true,
        "toolboxColor": "#ffffff",
        "toolboxEmphasisColor": "#18a4a6",
        "tooltipAxisColor": "#18a4a6",
        "tooltipAxisWidth": "2",
        "timelineLineColor": "#008acd",
        "timelineLineWidth": 1,
        "timelineItemColor": "#ffffff",
        "timelineItemColorE": "#006cdd",
        "timelineCheckColor": "#2ec7c9",
        "timelineCheckBorderColor": "#2ec7c9",
        "timelineItemBorderWidth": 1,
        "timelineControlColor": "#008acd",
        "timelineControlBorderColor": "#008acd",
        "timelineControlBorderWidth": 0.5,
        "timelineLabelColor": "#008acd",
        "datazoomBackgroundColor": "rgba(47,69,84,0)",
        "datazoomDataColor": "#efefff",
        "datazoomFillColor": "rgba(182,162,222,0.2)",
        "datazoomHandleColor": "#008acd",
        "datazoomHandleWidth": "100",
        "datazoomLabelColor": "#333333"
    }
}

================================================
FILE: webui/src/features/dashboard/index.js
================================================
import React from "react";

import { Row, Col, Button } from "reactstrap";
import {Input, Modal } from "antd";  
import TitleCard from "../../components/Cards/TitleCard"


import LineChart from './components/LineChart'
import BarChart from './components/BarChart'
// import Footprint from "./components/Footprint";
import Footprint from "./components/ScatterChart";


class Dashboard extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      selectedTaskIndex: -1,
      tasksInfo: [],
      ScatterData: [],
      TrajectoryData: [],
      isModalVisible: false,  // 控制Modal显示
      errorMessage: ""  // 存储输入的错误信息
    };
  }

  // Select the corresponding task to display
  handleTaskClick = (index) => {
    console.log(index)
    this.setState({ selectedTaskIndex: index });
    const messageToSend = {
      taskname:this.state.tasksInfo[this.state.selectedTaskIndex].problem_name,
    }
    fetch('http://localhost:5001/api/Dashboard/charts', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(messageToSend),
    })
    .then(response => {
      if (!response.ok) {
        throw new Error('Network response was not ok');
      } 
      return response.json();
    })
    .then(data => {
      // console.log('Message from back-end:', data);
      this.setState({
        // BarData: data.BarData,
        // RadarData: data.RadarData,
        ScatterData: data.ScatterData,
        TrajectoryData: data.TrajectoryData
      })
    })
    .catch((error) => {
      console.error('Error sending message:', error);
    });
  }

  // 处理按钮点击事件
  showModal = () => {
    this.setState({ isModalVisible: true });
  };

  handleOk = () => {
    console.log(this.state.errorMessage);
    this.setState({ isModalVisible: false });

    const messageToSend = {
    errorMessage: this.state.errorMessage
  };

  fetch("http://localhost:5001/api/Dashboard/errorsubmit", {  // 根据实际的API端点进行调整
    method: "POST",
    headers: {
      "Content-Type": "application/json",
    },
    body: JSON.stringify(messageToSend),
  })
    .then((response) => {
      if (!response.ok) {
        throw new Error("Network response was not ok");
      }
      return response.json();
    })
    .then((data) => {
      console.log("Message sent successfully:", data);
      this.setState({ isModalVisible: false, errorMessage: "" });
    })
    .catch((error) => {
      console.error("Error sending message:", error);
    });
};


  handleCancel = () => {
    this.setState({ isModalVisible: false });
  };

  handleInputChange = (e) => {
    this.setState({ errorMessage: e.target.value });
  };

  componentDidMount() {
    // 开始定时调用 fetchData 函数
    this.intervalId = setInterval(this.fetchData, 1000000);
  }

  componentWillUnmount() {
    // 清除定时器，以防止内存泄漏
    clearInterval(this.intervalId);
  }

  fetchData = async () => {
    try {
      const messageToSend = {
        taskname:this.state.tasksInfo[this.state.selectedTaskIndex].problem_name,
      }
      const response = await fetch('http://localhost:5001/api/Dashboard/trajectory', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend)
      });
      if (!response.ok) {
        throw new Error('Network response was not ok');
      }
      const data = await response.json();
      console.log('Data from server:', data);
      // 在这里处理从服务器获取的数据
      this.setState({
        // BarData: data.BarData,
        // RadarData: data.RadarData,
        ScatterData: data.ScatterData,
        TrajectoryData: data.TrajectoryData
      })
      // console.log('State:', this.state.BarData)
    } catch (error) {
      console.error('Error fetching data:', error);
    }
  };

  render() { 
    // If first time rendering, then render the default task
    // If not, then render the task that was clicked
    if (this.state.selectedTaskIndex === -1) {
      // TODO: ask for task list from back-end
      const messageToSend = {
        action: 'ask for tasks information',
      }
      fetch('http://localhost:5001/api/Dashboard/tasks', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Message from back-end:', data);
        this.setState({ selectedTaskIndex: 0,  tasksInfo: data });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

      
      // Set the default task as the first task in the list
      return (
        <div>
          <h1 className="page-title">
            Dashboard - <span className="fw-semi-bold">Tasks</span>
          </h1>
        </div>
      )
    } 
    else {

        return (
            <>
            <TitleCard
                title={
                    <h5>
                    <span className="fw-semi-bold">Choose Dataset</span>
                    </h5>
                    }
                    collapse
            >

                <div className="grid mt-4 grid-cols-1 lg:grid-cols-[20%_80%] gap-6">

                <div style={{ overflowY: 'auto', maxHeight: "400px" }}>
                    {this.state.tasksInfo.map((task, index) => (
                        <Button
                        key={index}
                        onClick={() => this.handleTaskClick(index)}
                        style={{ backgroundColor: 'rgba(53, 162, 235, 0.5)', color: '#000000' }} // 自定义背景色和文字颜色

                        >
                        {task.problem_name}
                        
                        </Button>
                    ))}
                </div>
                <div style={{ overflowY: 'auto', maxHeight: '400px', padding: '10px', border: '1px solid #ddd', borderRadius: '8px', backgroundColor: '#f9f9f9' }}>
                
                <section style={{ marginBottom: '20px', borderBottom: '1px solid #e0e0e0', paddingBottom: '10px' }}>
                    <h4 style={{ color: '#333', marginBottom: '10px', fontSize: '1.2em', fontWeight: 'bold' }}>
                        Problem Information
                    </h4>
                    <ul style={{ listStyle: 'none', padding: 0, lineHeight: '1.6' }}>
                        <li style={{ marginBottom: '8px', fontSize: '0.95em' }}>
                        <strong>Problem Name:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].problem_name}
                        </li>
                        <li style={{ marginBottom: '8px', fontSize: '0.95em' }}>
                        <strong>Variable num:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].dim},&nbsp;
                        <strong>Objective num:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].obj},&nbsp;
                        <strong>Seeds:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].seeds},&nbsp;
                        <strong>Budget type:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].budget_type},&nbsp;
                        <strong>Budget:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].budget},&nbsp;
                        <strong>Workloads:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].workloads},&nbsp;
                        <strong>Fidelity:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].fidelity}
                        </li>
                    </ul>
                </section>


                <section style={{ marginBottom: '20px', borderBottom: '1px solid #e0e0e0', paddingBottom: '10px' }}>
                    <h4 style={{ color: '#333', marginBottom: '10px', fontSize: '1.2em', fontWeight: 'bold' }}>
                        Algorithm Objects
                    </h4>
                    <ul style={{ listStyle: 'none', padding: 0, lineHeight: '1.6' }}>
                        <li style={{ marginBottom: '8px', fontSize: '0.95em' }}>
                        <strong>Narrow Search Space:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].SpaceRefiner},&nbsp;
                        <strong>Initialization:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].Sampler},&nbsp;
                        <strong>Pre-train:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].Pretrain},&nbsp;
                        <strong>Surrogate Model:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].Model},&nbsp;
                        <strong>Acquisition Function:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].ACF},&nbsp;
                        <strong>Normalizer:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].Normalizer}
                        </li>
                        <li style={{ marginBottom: '8px', fontSize: '0.95em' }}>
                        <strong>DatasetSelector:</strong> {this.state.tasksInfo[this.state.selectedTaskIndex].DatasetSelector}
                        </li>
                    </ul>
                </section>

                <section style={{ marginBottom: '20px', paddingBottom: '10px' }}>
                    <h4 style={{ color: '#333', marginBottom: '10px', fontSize: '1.2em', fontWeight: 'bold' }}>
                        Auxilliary Data List
                    </h4>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Narrow Search Space:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.SpaceRefiner.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Initialization:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.Sampler.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Pre-train:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.Pretrain.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Surrogate Model:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.Model.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Acquisition Function:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.ACF.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>

                    <div style={{ marginBottom: '10px', fontSize: '0.95em' }}>
                        <strong>Normalizer:</strong>
                        <ul style={{ listStyle: 'square', paddingLeft: '20px', lineHeight: '1.6' }}>
                        {this.state.tasksInfo[this.state.selectedTaskIndex].metadata.Normalizer.map((dataset, index) => (
                            <li key={index}>{dataset}</li>
                        ))}
                        </ul>
                    </div>
                </section>

                    </div>
            </div>
            </TitleCard>


              <div className="grid lg:grid-cols-3 mt-4 grid-cols-1 gap-6">
                <LineChart  TrajectoryData={this.state.TrajectoryData}/>
                <BarChart ImportanceData={this.state.Importance}/>
                <Footprint ScatterData={this.state.ScatterData}/>
              </div>
          

            </>
          );          
    }
  }
}

export default Dashboard;


================================================
FILE: webui/src/features/documentation/DocComponents.js
================================================
import { useEffect, useState } from "react"
import { useDispatch } from "react-redux"
import TitleCard from "../../components/Cards/TitleCard"
import { setPageTitle, showNotification } from "../common/headerSlice"
import DocComponentsNav from "./components/DocComponentsNav"
import ReadMe from "./components/GettingStartedContent"
import DocComponentsContent from "./components/DocComponentsContent"
import FeaturesNav from "./components/FeaturesNav"
import FeaturesContent from "./components/FeaturesContent"


function DocComponents(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Documentation"}))
      }, [])


    return(
        <>
            <div className="bg-base-100  flex overflow-hidden  rounded-lg" style={{height : "82vh"}}>
                    <div className="flex-none p-4">
                        <DocComponentsNav activeIndex={1}/>
                    </div>

                    <div className="grow pt-16  overflow-y-scroll">
                        <DocComponentsContent />
                    </div>

                </div>
           
        </>
    )
}

export default DocComponents

================================================
FILE: webui/src/features/documentation/DocFeatures.js
================================================
import { useEffect, useState } from "react"
import { useDispatch } from "react-redux"
import TitleCard from "../../components/Cards/TitleCard"
import { setPageTitle, showNotification } from "../common/headerSlice"
import GettingStartedNav from "./components/GettingStartedNav"
import ReadMe from "./components/GettingStartedContent"
import GettingStartedContent from "./components/GettingStartedContent"
import FeaturesNav from "./components/FeaturesNav"
import FeaturesContent from "./components/FeaturesContent"


function Features(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Documentation"}))
      }, [])


    return(
        <>
            <div className="bg-base-100  flex overflow-hidden  rounded-lg" style={{height : "82vh"}}>
                    <div className="flex-none p-4">
                        <FeaturesNav activeIndex={1}/>
                    </div>

                    <div className="grow pt-16  overflow-y-scroll">
                        <FeaturesContent />
                    </div>

                </div>
           
        </>
    )
}

export default Features

================================================
FILE: webui/src/features/documentation/DocGettingStarted.js
================================================
import { useEffect, useState } from "react"
import { useDispatch } from "react-redux"
import TitleCard from "../../components/Cards/TitleCard"
import { setPageTitle, showNotification } from "../common/headerSlice"
import GettingStartedNav from "./components/GettingStartedNav"
import ReadMe from "./components/GettingStartedContent"
import GettingStartedContent from "./components/GettingStartedContent"


function GettingStarted(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Documentation"}))
      }, [])


    return(
        <>
            <div className="bg-base-100  flex overflow-hidden  rounded-lg" style={{height : "82vh"}}>
                    <div className="flex-none p-4">
                        <GettingStartedNav activeIndex={1}/>
                    </div>

                    <div className="grow pt-16  overflow-y-scroll">
                        <GettingStartedContent />
                    </div>

                </div>
           
        </>
    )
}

export default GettingStarted

================================================
FILE: webui/src/features/documentation/components/DocComponentsContent.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import InputText from '../../../components/Input/InputText'
import Title from '../../../components/Typography/Title'
import Subtitle from '../../../components/Typography/Subtitle'
import ErrorText from '../../../components/Typography/ErrorText'
import HelperText from '../../../components/Typography/HelperText'

import { setPageTitle, showNotification } from '../../common/headerSlice'
import TitleCard from '../../../components/Cards/TitleCard'

function DocComponentsContent(){

    const dispatch = useDispatch()

    const updateFormValue = () => {
        // Dummy function for input text component
    }

    return(
        <>
            <article className="prose">
              <h1 className="" >Components</h1>

                We have added some global components that are used commonly inside the project.

                {/* Typography*/}
              <h2 id="component1">Typography</h2>
                <div>
                    These components are present under <span className="badge mt-0 mb-0 badge-ghost">/components/Typography</span> folder. It accepts styleClass as props which can be used to pass additional className for style. It has following components which you can import and use it - 
                    <div className="mockup-code mt-4">
                    <pre className='my-0 py-0'><code>{'import  Title from "../components/Typography/Title"\n  <Title>Your Title here</Title>'}</code></pre>
                    </div>
                    <ul>
                      <li><span className='font-bold'>Title</span> - Use this component to show title 
                      <Title>Title Example</Title>
                       </li>
                      <li><span className='font-bold'>Subtitle</span> - Component that shows text smaller than title 
                      <Subtitle styleClass="mt-4 mb-6">Subtitle Example</Subtitle>
                      </li>
                      <li><span className='font-bold'>ErrorText</span> - Used for showing error messages 
                      <ErrorText styleClass="mt-2">Error Text Example</ErrorText>
                      </li>
                      <li><span className='font-bold'>HelperText</span> - Used for showing secondary message 
                      <HelperText styleClass="">Helper Text Example</HelperText></li>
                    </ul>
                </div>


                 {/* Form Input*/}
              <h2 id="component2">Form Input</h2>
                <p>
                      Many times we have to use form input like text, select one or toogle and in every file we have to handle its state management, here we have added global form component that can be used in any file and state variables can be managed by passing props to it. It is present in <span className="badge mt-0 mb-0 badge-ghost">/components/Input</span> folder. 
                </p>
                Ex- 
                <div className="mockup-code mt-4">
                    <pre className='my-0 py-0'><code>{'const INITIAL_LEAD_OBJ = {\n   first_name : "", \n   last_name : "", \n   email : "" \n  } \n   const [leadObj, setLeadObj] = useState(INITIAL_LEAD_OBJ) \n   const updateFormValue = ({updateType, value}) => {\n    setErrorMessage("") \n    setLeadObj({...leadObj, [updateType] : value})\n   }\n\n<InputText type="text" defaultValue={leadObj.first_name}  \n  updateType="first_name" containerStyle="mt-4"  \n  labelTitle="First Name" updateFormValue={updateFormValue}/>'}</code></pre>
                </div>
                <InputText type="text" defaultValue={"input value"}  updateType="first_name" containerStyle="mt-3" labelTitle="Label Title" updateFormValue={updateFormValue}/>
                

               <p> This example is from add new lead modal, here we are importing component for creating text input and passing some props to handle its content and state variable. Description of props are as follows - </p>
                <ul>
                  <li><span className='font-bold'>type</span> - Input type value like number, date, time etc.. </li>
                  <li><span className='font-bold'>updateType</span> - This is used to update state variable in parent component</li>
                  <li><span className='font-bold'>containerStyle</span> - Style class for container of input, which include label as well</li>
                  <li><span className='font-bold'>labelTitle</span> - Title of the label</li>
                  <li><span className='font-bold'>updateFormValue</span> - Function of parent component to update state variable</li>
                </ul>
            

                 {/* Cards */}
                 <h2 id="component3">Cards</h2>
                <p>
                    <a href="https://daisyui.com/components/card/" target="_blank">Daisy UI</a> already have many cards layout, on top of that we have added one card component that accept title props and shows children inside its body. Also there is a divider between title and body of card. On more provision has been added to add buttons on top left side of card using TopSideButtons props (check leads page).

                </p>
                Ex - 
                <div className="mockup-code mt-4">
                    <pre className='my-0 py-0'><code>{'<TitleCard title={"Card Title"}> <h1>Card Body</h1></TitleCard>'}</code></pre>
                </div>
                <div className='p-8 bg-base-300 rounded-lg mt-4'>
                    <TitleCard title={"Card Title"}> <h1>Card Body</h1></TitleCard>
                </div>


                    <div className='h-24'></div>


            </article>
        </>
    )
}

export default DocComponentsContent

================================================
FILE: webui/src/features/documentation/components/DocComponentsNav.js
================================================
import { useState } from "react"

function DocComponentsNav({activeIndex}){

    const SECTION_NAVS = [
        {name : "Typography", isActive : activeIndex === 1 ? true : false},
        {name : "Form Input", isActive : false},
        {name : "Cards", isActive : false},
    ]
    const [navs, setNavs] = useState(SECTION_NAVS)

    const scrollToSection = (currentIndex) => {
        setNavs(navs.map((n, k) => {
            if(k === currentIndex)return {...n, isActive : true}
            else return {...n, isActive : false}
        }))
        document.getElementById('component'+(currentIndex+1)).scrollIntoView({behavior: 'smooth' })
    }

    return(
        <ul className="menu w-56 mt-10 text-sm">
            <li className="menu-title"><span className="">Components</span></li>
            
            {
                navs.map((n, k) => {
                    return(
                        <li key={k} onClick={() => scrollToSection(k)} className={n.isActive ? "bordered" : ""}><a>{n.name}</a></li>
                    )
                })
            }
        </ul>
    )
}

export default DocComponentsNav

================================================
FILE: webui/src/features/documentation/components/FeaturesContent.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import Subtitle from '../../../components/Typography/Subtitle'
import { setPageTitle, showNotification } from '../../common/headerSlice'

function FeaturesContent(){

    const dispatch = useDispatch()

    return(
        <>
            <article className="prose">
              <h1 className="">Features</h1>


                {/* Authentication*/}
              <h2 id="feature1">Authentication</h2>
                <p>
                   JWT based Authentication logic is present in <span className="badge mt-0 mb-0 badge-ghost">/app/auth.js</span>. In the file you can see we are adding bearer token in header for every request. Every routes under <span className="badge mt-0 mb-0 badge-ghost">/routes/</span> folder will need authentication. For public routes like login, register you will have to add routes in <span className="badge mt-0 mb-0 badge-ghost">App.js</span> file and also include the path in PUBLIC_ROUTES variable under <span className="badge mt-0 mb-0 badge-ghost">/app/auth.js</span> file so that auto redirect to login page is not triggered.
                   
                </p>


                   {/* Left Sidebar*/}
              <h2 id="feature2">Left Sidebar</h2>
                  <p>
                      This is main internal navigation (for pages that will come after login only), all sidebar menu items with their icons are present in <span className="badge mt-0 mb-0 badge-ghost">/routes/sidebar.js</span>  file, while  path and page components mapping are respectively present in <span className="badge mt-0 mb-0 badge-ghost">/routes/index.js</span> file.
                    </p>


                {/* Add New Page*/}
            <h2 id="feature3">Add New Page</h2>
                <p>All <span className='font-semibold'>public routes</span> are present in <span className="badge mt-0 mb-0 badge-ghost">App.js</span> file. Steps to add new public page - 
                </p>

                <ul className='mt-0'>
                        <li>Create Page inside <span className="badge mt-0 mb-0 badge-ghost">/pages</span> folder</li>
                        <li>Go to <span className="badge mt-0 mb-0 badge-ghost">App.js</span> and import the component and add its path</li>
                        <li>Add your new route path in <span className="badge mt-0 mb-0 badge-ghost">/app/auth.js</span> file under PUBLIC_ROUTES variable, this will allow the page to open without login.</li>
                </ul>

                <p className='mt-4'>All <span className='font-semibold'>protected routes</span> are present in <span className="badge mt-0 mb-0 badge-ghost">/routes/sidebar.js</span> file</p>

                <ul className='mt-0'>
                        <li>Create your page inside <span className="badge mt-0 mb-0 badge-ghost">/pages/protected</span> folder</li>
                        <li>Add your new routes in <span className="badge mt-0 mb-0 badge-ghost">/routes/sidebar.js</span>, this will show your new page in sidebar</li>
                        <li>Import your new routes component and map its path in <span className="badge mt-0 mb-0 badge-ghost">/routes/index.js</span></li>
                 </ul>


              {/* Right Sidebar*/}
              <h2 id="feature4">Right Sidebar</h2>
                    <div>
                        This is used for showing long list contents like notifications, settings etc.. We are using redux to show and hide and it is single component and can be called from any file with dispatch method.
                        To add new content follow following steps:
                        <ul>
                          <li>Create new component file containing main body of your content</li>
                          <li>Create new variable in <span className="badge mt-0 mb-0 badge-ghost">/utils/globalConstantUtils.js</span> file under RIGHT_DRAWER_TYPES variable</li>
                          <li>Now include the file mapped with the new variable in <span className="badge mt-0 mb-0 badge-ghost">/containers/RightSidebar.js</span> file using switch. <br />
                           For ex- If you new component name is <span className="badge mt-0 mb-0 badge-ghost">TestRightSideBar.js</span> and  variable name is TEST_RIGHT_SIDEBAR, then add following code inside switch code block
                          <br />
                          <div className="mockup-code mt-4">
                                <pre className='my-0 py-0'><code>{`[RIGHT_DRAWER_TYPES.TEST_RIGHT_SIDEBAR] : \n<TestRightSideBar {...extraObject} closeRightDrawer={close}/>`}</code></pre>
                          </div>
                          <span className='text-sm mt-1 italic'>Here extraObject have variables that is passed from parent component while calling openRightDrawer method</span>
                          </li>
                          <li>Now the last step, call dispatch method as follows
                          <div className="mockup-code mt-1">
                                <pre className='my-0 py-0'><code>{'import { useDispatch } from "react-redux"\n  const dispatch = useDispatch()\n  dispatch(openRightDrawer({header : "Test Right Drawer", \n  bodyType : RIGHT_DRAWER_TYPES.TEST_RIGHT_SIDEBAR}))'}</code></pre>
                          </div> 
                          </li>
                        </ul>
                    </div>


                    {/* Themes*/}
              <h2 id="feature5">Themes</h2>
                <p>
                By default we have added light and dark theme and Daisy UI comes with a number of themes, which you can use with no extra effort, you just have to include it in <span className="badge mt-0 mb-0 badge-ghost">tailwind.config.js</span> file,  you can add themes like cupcake, corporate, reto etc... Also we can configure themes colors in config file, for more documentation on themes checkout <a href="https://daisyui.com/docs/themes/" target="_blank">Daisy UI documentation.</a>
                </p>


                    {/* Modal*/}
              <h2 id="feature6">Modal</h2>
                  <div>
                        With global modal functionality you dont have to create seperate modal for each page. We are using redux to show and hide and it is a single component and can be called from any file with dispatch method.
                        Code for showing modal is present in modalSlice and layout container component. To show modal just call openModal() function of modalSlice using dispatch.
                        <br />
                        To add new modal in any page follow following steps:
                        <ul>
                          <li>Create new component file containing main body of your modal content</li>
                          <li>Create new variable in <span className="badge mt-0 mb-0 badge-ghost">/utils/globalConstantUtils.js</span> file under MODAL_BODY_TYPES variable</li>
                          <li>Now include the file mapped with the new variable in <span className="badge mt-0 mb-0 badge-ghost">/containers/ModalLayout.js</span> file using switch. <br />
                           For ex- If you new component name is <span className="badge mt-0 mb-0 badge-ghost">TestModal.js</span> and  variable name is TEST_MODAL, then add following code inside switch code block
                          <br />
                          <div className="mockup-code mt-4">
                                <pre className='my-0 py-0'><code>{`[RIGHT_DRAWER_TYPES.TEST_MODAL] : \n<TestModal closeModal={close} extraObject={extraObject}/>`}</code></pre>
                          </div>
                          <span className='text-sm mt-1 italic'>Here extraObject have variables that is passed from parent component while calling openModal method</span>
                          </li>
                          <li>Now the last step, call dispatch method as follows
                          <div className="mockup-code mt-1">
                                <pre className='my-0 py-0'><code>{'import { useDispatch } from "react-redux"\n  const dispatch = useDispatch()\n   dispatch(openModal({title : "Test Modal Title", \n   bodyType : MODAL_BODY_TYPES.TEST_MODAL}))'}</code></pre>
                          </div> 
                          </li>
                        </ul>
                    </div>


                  {/* Notification*/}
                  <h2 id="feature7">Notification</h2>
                  <p>Many times we have to show notification to user be it on successfull form submission or any api success. And requirement can come to show notification from any page, so global notification handling is needed.</p>

                    <p className='mt-4'>Code for showing notification is present in headerSlice and layout container component. To show notification just call <span className='badge badge-ghost'>showNotification()</span> function of headerSlice using dispatch. To show success message notification pass status as 1 and for showing error message pass status as 0.</p> 

                    <div className="mockup-code mb-4">
                          <pre className='my-0 py-0'><code>{'import { useDispatch } from "react-redux"\n  const dispatch = useDispatch()\n  dispatch(showNotification({message : "Message here", status : 1}))'}</code></pre>
                    </div> 

                    <p>Click on this button to check</p>

                    <button className='btn btn-success' onClick={() => dispatch(showNotification({message : "Your message has been sent!", status : 1}))}>Success</button>

                    <button className='btn btn-error ml-4' onClick={() => dispatch(showNotification({message : "Something went wrong!", status : 0}))}>Error</button>


                    <div className='h-24'></div>


            </article>
        </>
    )
}

export default FeaturesContent

================================================
FILE: webui/src/features/documentation/components/FeaturesNav.js
================================================
import { useState } from "react"

function FeaturesNav({activeIndex}){

    const SECTION_NAVS = [
        {name : "Authentication", isActive : activeIndex === 1 ? true : false},
        {name : "Sidebar", isActive : false},
        {name : "Add New Page", isActive : false},
        {name : "Right sidebar", isActive : false},
        {name : "Themes", isActive : false},
        {name : "Modal", isActive : false},
        {name : "Notification", isActive : false},
    ]
    const [navs, setNavs] = useState(SECTION_NAVS)

    const scrollToSection = (currentIndex) => {
        setNavs(navs.map((n, k) => {
            if(k === currentIndex)return {...n, isActive : true}
            else return {...n, isActive : false}
        }))
        document.getElementById('feature'+(currentIndex+1)).scrollIntoView({behavior: 'smooth' })
    }

    return(
        <ul className="menu w-56 mt-10 text-sm">
            <li className="menu-title"><span className="">Features</span></li>
            
            {
                navs.map((n, k) => {
                    return(
                        <li key={k} onClick={() => scrollToSection(k)} className={n.isActive ? "bordered" : ""}><a>{n.name}</a></li>
                    )
                })
            }
        </ul>
    )
}

export default FeaturesNav

================================================
FILE: webui/src/features/documentation/components/GettingStartedContent.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import Subtitle from '../../../components/Typography/Subtitle'
import { setPageTitle } from '../../common/headerSlice'

function GettingStartedContent(){

    const dispatch = useDispatch()


    return(
        <>
            <article className="prose">
              <h1 className="">Getting Started</h1>


              {/* Introduction */}
              <h2 className="" id="getstarted1">Introduction</h2>
              <p>A free dashboard template using <span className='font-bold'>Daisy UI</span> and react js. With the help of Dasisy UI, it comes with <span className='font-bold'>fully customizable and themable CSS</span> and power of Tailwind CSS utility classes. We have also added <span className='font-bold'>redux toolkit</span>  and configured it for API calls and state management.</p> 
              <p>User authentication has been implemented using JWT token method (ofcourse you need backend API for generating and verifying token). This template can be used to start your next SaaS project or build new internal tools in your company.</p>
              <h4> Core libraries used - </h4>
              <ul>
                  <li><a href="https://reactjs.org/" target="_blank">React JS v18.2.0</a></li>
                  <li><a href="https://reactrouter.com/en/main" target="_blank">React Router v6.4.3</a></li>
                  <li><a href="https://tailwindcss.com/" target="_blank">Tailwind CSS v3.3.6</a></li>
                  <li><a href="https://daisyui.com/" target="_blank">Daisy UI v4.4.19</a></li>
                  <li><a href="https://heroicons.com/" target="_blank">HeroIcons v2.0.13</a></li>
                  <li><a href="https://redux-toolkit.js.org/" target="_blank">Redux toolkit v1.9.0</a></li>
                  <li><a href="https://react-chartjs-2.js.org/" target="_blank">React ChartJS 2 v5.0.1</a></li>
              </ul>
              <h4>Major features - </h4>
              <p className=''>Almost all major UI components are available in Daisy UI library. Apart from this logic has been added for following - </p>
              <ul>
                  <li> <span className='font-bold'>Light/dark</span> mode toggle</li>
                  <li> Token based user authentication</li>
                  <li> <span className='font-bold'>Submenu support</span> in sidebar</li>
                  <li> Store management using <span className='font-bold'>redux toolkit</span></li>
                  <li> <span className='font-bold'>Daisy UI</span> components</li>
                  <li> <span className='font-bold'>Right and left sidebar</span>, Universal loader, notifications and other components</li>
                  <li> React <span className='font-bold'>chart js 2</span> examples</li>
              </ul>
              

              {/* How to Use */}
              <h2 id="getstarted2">How to use?</h2>
                <p>
                    Just clone the repo from github and then run following command (Make sure you have node js installed )<br/>
                    <a href="https://github.com/srobbin01/daisyui-admin-dashboard-template" className='text-sm text-blue-500' target="_blank">Repo Link</a>
                    <br />
                    <code> npm install </code><br />
                    <code>npm start</code>
                </p>


              {/* Tailwind CSS*/}
              <h2 id="getstarted3">Tailwind CSS</h2>
                <p>
                Tailwind CSS is a utility-first CSS framework with predefined classes that you can use to build and design the UI directly in the JSX. We have also included Daisy UI Component, that is based on tailwind CSS.
                </p>

              {/* Daisy UI */}
              <h2 id="getstarted4">Daisy UI</h2>

              <p><a href="https://daisyui.com/" target="_blank" className='text-xl btn-link'>Daisy UI</a>, a popular free and opensource tailwind component library has been used for this template. It has a rich collection of components, layouts and is fully customizable and themeable.</p>
              
              <p>Apart from this it also helps in making HTML code more cleaner as we don't have to include all utility classes of tailwind to make the UI. Check components <a href="https://daisyui.com/components/button/" target="_blank" className='btn-link'>documentation here</a>. For Ex- </p>

              <div className='text-center'>
                <h2 className='text-xl font-bold mb-0.5'>Creating a button</h2>
              </div>
              <div className="">

                  <div className='text-center'>
                        <p className='text-center font-semibold'> using only utility classes of tailwind</p>
                        <div className="mockup-code text-justify mb-4">
                          <pre className='my-0 py-0'><code>{'<a className="inline-block px-4 py-3 \n text-sm font-semibold text-center \n text-white uppercase transition duration-200 \n ease-in-out bg-indigo-600 \n rounded-md cursor-pointer \n hover:bg-indigo-700">Button</a>'}</code></pre>
                        </div> 
                        <button className="inline-block  px-4 py-3  text-sm font-semibold text-center  text-white uppercase transition duration-200  ease-in-out bg-indigo-600  rounded-md cursor-pointer  hover:bg-indigo-700">Button</button>
                  </div>

                  <div className="divider"></div> 

                  <div className='grid w-full flex-grow'>
                      <p className='text-center font-semibold'>using daisyUI component classes</p>
                        <div className="mockup-code mb-4">
                          <pre className='my-0 py-0'><code>{'<a className="btn btn-primary">\nButton</a>'}</code></pre>
                        </div> 
                        <button className="btn btn-primary">Button</button>
                  </div>
                </div>


                   {/* Chart JS */}
              <h2 id="getstarted5">Chart JS</h2>
                 <p>
                 Chart JS library has rich components of different charts available. It is based on  <a href="https://www.chartjs.org/" target="_blank" alt=""> Chart.js</a> library, the most popular charting library. We have added this library and added couple of examples in seperate page.
                 </p>


                  {/* Redux Toolkit */}
              <h2 id="getstarted6">Redux Toolkit</h2>
                 <p>
                 The Redux Toolkit package helps in writing redux logic easily. It was originally created to help address three common concerns about Redux:
                    <li>Configuring a Redux store is too complicated</li>
                    <li>I have to add a lot of packages to get Redux to do anything useful</li>
                    <li>Redux requires too much boilerplate code"</li>
                    This library has been configured and used for showing notifications, modals and loading data from API in leads page.
                 </p>


                  {/* Hero Icons */}
              <h2 id="getstarted7">Hero Icons</h2>
                <p><a href="https://heroicons.com/" target="_blank" className='text-xl btn-link'>HeroIcons</a> library has been used for all the icons in this templates. It has a rich collection of SVG icons, and is made by the makers of Tailwind CSS.</p>

                <p className='mt-4'>Each icon can be imported individually as a React component, check <a href="https://github.com/tailwindlabs/heroicons" target="_blank" className='btn-link'>documentation</a></p>

                <pre><code>{"import BeakerIcon from '@heroicons/react/24/solid/BeakerIcon'"}</code></pre>
                <p>Use as follows in your component</p>
                <pre><code>{"<BeakerIcon className='h-6 w-6'/>"}</code></pre>

                <div className="divider "></div>

                <div className="alert mt-4 alert-warning shadow-lg">
                    <div><span>Note: Importing all icons in single line will increase your build time</span></div>
                </div>

                <p>Don't import like this (will load all icons and increase build time)</p>
                <pre><code>{"import {BeakerIcon, BellIcon } from '@heroicons/react/24/solid'"}</code></pre>

                <p>Instead import as follows</p>
                <pre><code>{"import BeakerIcon from '@heroicons/react/24/solid/BeakerIcon'"}<br />
                {"import BellIcon from '@heroicons/react/24/solid/BellIcon'"}</code></pre>

                <div className="badge badge-secondary">This is better way for importing icons</div>


                 {/* Project Structure */}
              <h2 id="getstarted8">Project Structure</h2>
              <h4>Folders - </h4>
              <ul className='mt-0'>
                  <li>app - store management, auth and libraries settings are present</li>
                  <li>components - this include all common components to be used in project</li>
                  <li>containers - components related to layout like sidebar, page layout, header etc..</li>
                  <li>features - main folder where all page logic resides, there will be folder for each page and additional folder inside that to group different functionalities like components, modals etc... Redux slice file will also present inside page specific folder.</li>
                  <li>pages - this contain one single file related to one page, if you want to divide page into different components file, use features folder and create seperate folder related to that page</li>
                  <li>routes - all settings related to routes</li>
                </ul>

              <h4>Files - </h4>
              <ul className='mt-0'>
                    <li>App.js - Main file containing different routes and components </li>
                    <li>index.css - Additional global css if required</li>
                    <li>index.js - Entry point of project</li>
                    <li>package.json - All dependencies and npm scripts</li>
                    <li>tailwind.config.js - Tailwind CSS configuration file, add theme customization and new themes in this file</li>
                </ul>


                <div className='h-24'></div>

            </article>
        </>
    )
}

export default GettingStartedContent

================================================
FILE: webui/src/features/documentation/components/GettingStartedNav.js
================================================
import { useState } from "react"

function GettingStartedNav({activeIndex}){

    const SECTION_NAVS = [
        {name : "Introduction", isActive : activeIndex === 1 ? true : false},
        {name : "How to Use", isActive : false},
        {name : "Tailwind CSS", isActive : false},
        {name : "Daisy UI", isActive : false},
        {name : "Chart JS", isActive : false},
        {name : "Redux Toolkit", isActive : false},
        {name : "Hero Icons", isActive : false},
        {name : "Project Structure", isActive : false},
    ]
    const [navs, setNavs] = useState(SECTION_NAVS)

    const scrollToSection = (currentIndex) => {
        setNavs(navs.map((n, k) => {
            if(k === currentIndex)return {...n, isActive : true}
            else return {...n, isActive : false}
        }))
        document.getElementById('getstarted'+(currentIndex+1)).scrollIntoView({behavior: 'smooth' })
    }

    return(
        <ul className="menu w-56 mt-10 text-sm">
            <li className="menu-title"><span className="">Getting Started</span></li>
            
            {
                navs.map((n, k) => {
                    return(
                        <li key={k} onClick={() => scrollToSection(k)} className={n.isActive ? "bordered" : ""}><a>{n.name}</a></li>
                    )
                })
            }
        </ul>
    )
}

export default GettingStartedNav

================================================
FILE: webui/src/features/experiment/components/DashboardStats.js
================================================
function DashboardStats({title, icon, value, description, colorIndex}){

    const COLORS = ["primary", "primary"]

    const getDescStyle = () => {
        if(description.includes("↗︎"))return "font-bold text-green-700 dark:text-green-300"
        else if(description.includes("↙"))return "font-bold text-rose-500 dark:text-red-400"
        else return ""
    }

    return(
        <div className="stats shadow">
            <div className="stat">
                <div className={`stat-figure dark:text-slate-300 text-${COLORS[colorIndex%2]}`}>{icon}</div>
                <div className="stat-title dark:text-slate-300">{title}</div>
                <div className={`stat-value dark:text-slate-300 text-${COLORS[colorIndex%2]}`}>{value}</div>
                <div className={"stat-desc  " + getDescStyle()}>{description}</div>
            </div>
        </div>
    )
}

export default DashboardStats

================================================
FILE: webui/src/features/experiment/components/SearchData.js
================================================
import React, {useState} from "react";

import {
    Row,
    Col,
    Button,
    InputNumber,
    Slider,
    Space,
    Input,
    Form,
    ConfigProvider,
    Select,
    Modal,
} from "antd";


function SearchData({set_dataset}) {
  const [form] = Form.useForm()

  const onFinish = (values) => {
    const messageToSend = values;
    console.log('Request data:', messageToSend);
    // 向后端发送请求...
    fetch('http://localhost:5001/api/configuration/search_dataset', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(messageToSend), 
    })
    .then(response => {
      if (!response.ok) {
        throw new Error('Network response was not ok');
      } 
      return response.json();
    })
    .then(message => {
      console.log('Message from back-end:', message);
        set_dataset(message)
      }
    )
    .catch((error) => {
      console.error('Error sending message:', error);
      var errorMessage = error.error;
      Modal.error({
        title: 'Information',
        content: 'Error:' + errorMessage
      })
    });
  }

  return(
    <ConfigProvider
      theme={{
        components: {
          Input: {
            addonBg:"white"
          },
        },
      }}  
    >
    <Form
      name="SearchData"
      form={form}
      onFinish={onFinish}
      style={{width:"100%"}}
      autoComplete="off"
    >
      <Space className="space" style={{ display: 'flex'}} align="baseline">
        <Form.Item
          name="task_name"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Dataset Name"}/>
        </Form.Item>
        <Form.Item
          name="num_variables"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Num of Variables"}/>
        </Form.Item>
      </Space>
      <Space className="space" style={{ display: 'flex'}} align="baseline">
        <Form.Item
          name="variables_name"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Variable Name"}/>
        </Form.Item>
        <Form.Item
          name="num_objectives"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Num of Objectives"}/>
        </Form.Item>
      </Space>
      <h6 style={{color:"black"}}>Search method:</h6>
      <Space className="space" style={{ display: 'flex'}} align="baseline">
      <Form.Item
        name="search_method"
      >
        <Select style={{minWidth: 150}}
          options={[ {value: "Hash"},
                      {value: "Fuzzy"},
                      {value: "LSH"},
                  ]}
        />
      </Form.Item>
      <Form.Item>
        <Button type="primary" htmlType="submit" style={{width:"120px"}}>
          Search
        </Button>
      </Form.Item>
      </Space>
    </Form>
    </ConfigProvider>
  )
}

export default SearchData

================================================
FILE: webui/src/features/experiment/components/SelectAlgorithm.js
================================================
import React, { useState } from "react";
import { PlusOutlined } from '@ant-design/icons';
import { Button, Form, Select, Drawer, Modal } from "antd";
import DashboardStats from './DashboardStats'; // 确保路径正确
import UserGroupIcon from '@heroicons/react/24/outline/UserGroupIcon';
import UsersIcon from '@heroicons/react/24/outline/UsersIcon';
import CircleStackIcon from '@heroicons/react/24/outline/CircleStackIcon';
import CreditCardIcon from '@heroicons/react/24/outline/CreditCardIcon';

const filterOption = (input, option) =>
  (option?.value ?? '').toLowerCase().includes(input.toLowerCase());

function SelectAlgorithm({ SpaceRefiner, Sampler, Pretrain, Model, ACF, Normalizer }) {
  const [drawerVisible, setDrawerVisible] = useState(false);
  const [form] = Form.useForm(); // Form instance to manage form submission in the drawer
  const [selectedValues, setSelectedValues] = useState({
    SpaceRefiner: SpaceRefiner[0]?.name || '',
    Sampler: Sampler[0]?.name || '',
    Pretrain: Pretrain[0]?.name || '',
    Model: Model[0]?.name || '',
    ACF: ACF[0]?.name || '',
    Normalizer: Normalizer[0]?.name || '',
    SpaceRefinerParameters: '',
    SpaceRefinerDataSelector: 'None',
    SpaceRefinerDataSelectorParameters: '',
    SamplerParameters: '',
    SamplerInitNum: '11',
    SamplerDataSelector: 'None',
    SamplerDataSelectorParameters: '',
    PretrainParameters: '',
    PretrainDataSelector: 'None',
    PretrainDataSelectorParameters: '',
    ModelParameters: '',
    ModelDataSelector: 'None',
    ModelDataSelectorParameters: '',
    ACFParameters: '',
    ACFDataSelector: 'None',
    ACFDataSelectorParameters: '',
    NormalizerParameters: '',
    NormalizerDataSelector: 'None',
    NormalizerDataSelectorParameters: '',
  });

  const [showDashboardStats, setShowDashboardStats] = useState(false);

  const handleDrawerSubmit = () => {
    form
      .validateFields()
      .then(formValues => {
        // Combine selectedValues with formValues
        const messageToSend = { ...selectedValues, ...formValues };
  
        fetch('http://localhost:5001/api/configuration/select_algorithm', {
          method: 'POST',
          headers: {
            'Content-Type': 'application/json',
          },
          body: JSON.stringify(messageToSend),
        })
        .then(response => {
          if (!response.ok) {
            throw new Error('Network response was not ok');
          }
          return response.json();
        })
        .then(succeed => {
          console.log('Message from back-end:', succeed);
          Modal.success({
            title: 'Information',
            content: 'Submit successfully!',
          });
          setSelectedValues({ ...selectedValues, ...formValues }); // Update selectedValues with formValues
          setShowDashboardStats(true);
          form.resetFields(); // Reset the form fields after submission
          setDrawerVisible(false); // Close the drawer
        })
        .catch(error => {
          console.error('Error sending message:', error);
          Modal.error({
            title: 'Information',
            content: 'Error: ' + error.message,
          });
        });
      })
      .catch(info => {
        console.log('Validate Failed:', info);
      });
  };

  const statsData = [
    {title: "Space Refiner", value: selectedValues.SpaceRefiner || null, icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Space Refiner"},
    {title: "Sampler", value: selectedValues.Sampler || "N/A", icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Sampler"},
    {title: "Pretrain", value: selectedValues.Pretrain || null, icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Pretrain"},
    {title: "Model", value: selectedValues.Model || "N/A", icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Model"},
    {title: "Acquisition Function", value: selectedValues.ACF || "N/A", icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Acquisition Function"},
    {title: "Normalizer", value: selectedValues.Normalizer || "N/A", icon: <UserGroupIcon className='w-8 h-8'/>, description: "Details of Normalizer"},
  ].filter(stat => stat.value !== null && stat.value !== "N/A"); // Filter out entries with null or "N/A" values

  return (
    <>
      <Button type="primary" onClick={() => setDrawerVisible(true)} icon={<PlusOutlined />} style={{ width: "150px" }}>
        Start building
      </Button>

      <Drawer
        title="Select Algorithm"
        placement="right"
        closable={true}
        onClose={() => setDrawerVisible(false)}
        visible={drawerVisible}
        width={720}
      >
        <Form
          form={form}
          name="drawer_form"
          onFinish={handleDrawerSubmit}
          style={{ width: "100%" }}
          autoComplete="off"
          initialValues={selectedValues}
        >
          <Form.Item
            name="SpaceRefiner"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Space Refiner</span>}
            rules={[{ required: true, message: 'Please select a Space Refiner!' }]}
          >
            <Select
              showSearch
              placeholder="Space Refiner"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={SpaceRefiner.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="Sampler"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Sampler</span>}
            rules={[{ required: true, message: 'Please select a Sampler!' }]}
          >
            <Select
              showSearch
              placeholder="Sampler"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={Sampler.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="Pretrain"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Pretrain</span>}
            rules={[{ required: true, message: 'Please select a Pretrain!' }]}
          >
            <Select
              showSearch
              placeholder="Pretrain"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={Pretrain.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="Model"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Model</span>}
            rules={[{ required: true, message: 'Please select a Model!' }]}
          >
            <Select
              showSearch
              placeholder="Model"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={Model.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="ACF"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>ACF</span>}
            rules={[{ required: true, message: 'Please select an ACF!' }]}
          >
            <Select
              showSearch
              placeholder="ACF"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={ACF.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="Normalizer"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Normalizer</span>}
            rules={[{ required: true, message: 'Please select a Normalizer!' }]}
          >
            <Select
              showSearch
              placeholder="Normalizer"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={Normalizer.map(item => ({ value: item.name }))}
            />
          </Form.Item>

          <Form.Item>
            <Button type="primary" htmlType="submit" style={{ width: "150px" }}>
              Apply
            </Button>
          </Form.Item>
        </Form>
      </Drawer>

      {showDashboardStats && (
        <div className="grid lg:grid-cols-3 md:grid-cols-2 sm:grid-cols-1 gap-6 mt-6">
          {statsData.map((d, k) => (
            <DashboardStats key={k} {...d} colorIndex={k} />
          ))}
        </div>
      )}
    </>
  );
}

export default SelectAlgorithm;


================================================
FILE: webui/src/features/experiment/components/SelectData.js
================================================
import React, {useState} from "react";

import {
    Button,
    Checkbox,
    ConfigProvider,
    Modal,
    Select,
    Input,
} from "antd";

const CheckboxGroup = Checkbox.Group;

function SelectData({DatasetData, updateTable, DatasetSelector}) {
    var data = []
    if (DatasetData.isExact) {
      data = [DatasetData.datasets.name]
    } else {
      data = DatasetData.datasets
    }
    const [checkedList, setCheckedList] = useState([]);
    const [selectedOption, setSelectedOption] = useState();
    const [selector, setSelector] = useState("None");
    const [parameter, setParameter] = useState("");
    const checkAll = data.length === checkedList.length;
    const indeterminate = checkedList.length > 0 && checkedList.length < data.length;
    const onChange = (list) => {
        setCheckedList(list);
    };
    const onCheckAllChange = (e) => {
        setCheckedList(e.target.checked ? data : []);
    };
    const handleSelectChange = (value) => {
      setSelectedOption(value); // 当选择发生变化时更新选项
    };
    const handleSelectorChange = (value) => {
      setSelector(value); 
    };
    const handleParameterChange = (event) => {
      setParameter(event.target.value);
    };
    const handleClick = () => {
      const datasetList = checkedList.map(item => {
        return item;
      });
      const messageToSend = {
        object: selectedOption,
        DatasetSelector: selector,
        parameter: parameter,
        datasets: datasetList,
      }
      updateTable(messageToSend)
      console.log(messageToSend)
      fetch('http://localhost:5001/api/configuration/dataset', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
        Modal.success({
          title: 'Information',
          content: 'Submit successfully!'
        })
      })
      .catch((error) => {
        console.error('Error sending message:', error);
        var errorMessage = error.error;
        Modal.error({
          title: 'Information',
          content: 'Error:' + errorMessage
        })
      });
    }

    const handleDelete = () => {
      const datasetList = checkedList.map(item => {
        return item;
      });
      const messageToSend = {
        datasets: datasetList,
      }
      console.log(messageToSend)
      fetch('http://localhost:5001/api/configuration/delete_dataset', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
        datasetList.forEach(item => {
          let index = data.indexOf(item);
          if (index !== -1) {
            data.splice(index, 1);
          }
        });
        var newDataset = {"isExact": false, "datasets": data}
        console.log("new dataset:", newDataset)
        // set_dataset(newDataset)
        Modal.success({
          title: 'Information',
          content: 'Delete successfully!'
        })
      })
      .catch((error) => {
        console.error('Error sending message:', error);
        var errorMessage = error.error;
        Modal.error({
          title: 'Information',
          content: 'Error:' + errorMessage
        })
      });
    }

    return(
        <ConfigProvider
          theme={{
            components: {
              Checkbox: {
                colorText:"black"
              },
            },
          }}        
        >
          <div style={{ overflowY: 'auto', maxHeight: '300px' }}>
            <Checkbox indeterminate={indeterminate} onChange={onCheckAllChange} checked={checkAll}>
                Check all
            </Checkbox>
            <CheckboxGroup options={data} value={checkedList} onChange={onChange}/>
          </div>
          <Info  isExact={DatasetData.isExact} data={DatasetData.datasets}/>
          <div style={{marginTop:"20px"}}>
            <Select
            style={{minWidth: 170, margin: 5}}
            options={[ {value: "Narrow Search Space"},
                        {value: "Initialization"},
                        {value: "Pre-train"},
                        {value: "Surrogate Model"},
                        {value: "Acquisition Function"},
                        {value: "Normalizer"}
                    ]}
            onChange={handleSelectChange}
            />
            <Select
            style={{minWidth: 90, margin:5}}
            placeholder = "Dataset Selector"
            options = {DatasetSelector.map(item => ({ value: item.name })).concat({ value: "None" })}
            onChange={handleSelectorChange}
            />
            <Input style={{width: 400, margin:5}} placeholder="Parameters" onChange={handleParameterChange}/>
            <Button type="primary" htmlType="submit" style={{width:"120px", margin:5}} onClick={handleClick}>
              Submit
            </Button>
            <Button danger style={{width:"120px", margin:5}} onClick={handleDelete}>
              Delete
            </Button>
          </div>
        </ConfigProvider>
    )
}


function Info({isExact, data}) {
  if (isExact) {
    return (
      <div style={{ overflowY: 'auto', maxHeight: '250px' }}>
        <h4><strong>Information</strong></h4>
          <ul>
            <li><h6><span className="fw-semi-bold">Name</span>: {data.name}</h6></li>
            <li><h6><span className="fw-semi-bold">Dim</span>: {data.dim}</h6></li>
            <li><h6><span className="fw-semi-bold">Obj</span>: {data.obj}</h6></li>
            <li><h6><span className="fw-semi-bold">Fidelity</span>: {data.fidelity}</h6></li>
            <li><h6><span className="fw-semi-bold">Workloads</span>: {data.workloads}</h6></li>
            <li><h6><span className="fw-semi-bold">Budget type</span>: {data.budget_type}</h6></li>
            <li><h6><span className="fw-semi-bold">Budget</span>: {data.budget}</h6></li>
            <li><h6><span className="fw-semi-bold">Seeds</span>: {data.seeds}</h6></li>
          </ul>
          <h4 className="mt-5"><strong>Algorithm</strong></h4>
          <ul>
            <li><h6><span className="fw-semi-bold">Space refiner</span>: {data.SpaceRefiner}</h6></li>
            <li><h6><span className="fw-semi-bold">Sampler</span>: {data.Sampler}</h6></li>
            <li><h6><span className="fw-semi-bold">Pretrain</span>: {data.Pretrain}</h6></li>
            <li><h6><span className="fw-semi-bold">Model</span>: {data.Model}</h6></li>
            <li><h6><span className="fw-semi-bold">ACF</span>: {data.ACF}</h6></li>
            <li><h6><span className="fw-semi-bold">DatasetSelector</span>: {data.DatasetSelector}</h6></li>
            <li><h6><span className="fw-semi-bold">Normalizer</span>: {data.Normalizer}</h6></li>
          </ul>
          <h4 className="mt-5"><strong>Auxiliary Data List</strong></h4>
          <ul>
            {data.metadata.map((dataset, index) => (
              <li key={index}><h6>{dataset}</h6></li>
            ))}
          </ul>
      </div>
    )
  } else {
    return (
      <></>
    )
  }
}

export default SelectData

================================================
FILE: webui/src/features/experiment/components/SelectTask.js
================================================
import React, { useState } from "react";
import { PlusOutlined } from '@ant-design/icons';
import { Button, Form, Input, Select, Modal, Drawer, Table } from "antd";

const filterOption = (input, option) =>
  (option?.value ?? '').toLowerCase().includes(input.toLowerCase());

function TaskTable({ tasks }) {
  return (
    <Table
      dataSource={tasks}
      pagination={false}
      rowKey="name"
      columns={[
        { title: '#', dataIndex: 'index', key: 'index' },
        { title: 'Task Name', dataIndex: 'name', key: 'name' },
        { title: 'Variables', dataIndex: 'num_vars', key: 'num_vars' },
        { title: 'Objectives', dataIndex: 'num_objs', key: 'num_objs' },
        { title: 'Fidelity', dataIndex: 'fidelity', key: 'fidelity' },
        { title: 'Workloads', dataIndex: 'workloads', key: 'workloads' },
        { title: 'Budget Type', dataIndex: 'budget_type', key: 'budget_type' },
        { title: 'Budget', dataIndex: 'budget', key: 'budget' }
      ]}
      locale={{
        emptyText: 'No task'
      }}
    />
    
  );
}

function SelectTask({ data, updateTable }) {
  const [drawerVisible, setDrawerVisible] = useState(false);
  const [form] = Form.useForm(); // Form instance to manage form submission in the drawer
  const [tasks, setTasks] = useState([]); // State to store tasks added from Drawer

  const onFinish = (values) => {
    const messageToSend = tasks.map(task => ({
      name: task.name,
      num_vars: parseInt(task.num_vars),
      num_objs: task.num_objs,
      fidelity: task.fidelity,
      workloads: task.workloads,
      budget_type: task.budget_type,
      budget: task.budget,
    }));
    updateTable(messageToSend);
    console.log('Request data:', messageToSend);
    // Send request to backend...
    fetch('http://localhost:5001/api/configuration/select_task', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(messageToSend),
    })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        }
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
        Modal.success({
          title: 'Information',
          content: 'Submit successfully!'
        });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
        Modal.error({
          title: 'Information',
          content: 'Error: ' + error.message
        });
      });
  };

  const handleDrawerSubmit = () => {
    form
      .validateFields()
      .then(values => {
        console.log('Drawer form values:', values);

        // Add the task to the task list
        setTasks(prevTasks => [...prevTasks, values]);

        form.resetFields(); // Reset the form fields after submission
        setDrawerVisible(false); // Close the drawer
      })
      .catch(info => {
        console.log('Validate Failed:', info);
      });
  };

  return (
    <>
      <Form
        name="main_form"
        onFinish={onFinish}
        style={{ width: "100%" }}
        autoComplete="off"
      >
        <Form.List name="Tasks">
          {(fields, { add, remove }) => (
            <>
              <Form.Item
                name={['Experiment name']}
                style={{ marginBottom: '10px' }} // Add margin bottom
              >
                <Input
                  placeholder="Experiment name"
                  style={{
                    width: '300px', // Full width of the container
                    fontSize: '32px', // Font size
                    resize: 'vertical', // Allow vertical resizing only
                  }}
                />
              </Form.Item>

              <Form.Item
                name={['experiment_description']}
                style={{ marginBottom: '16px' }} // Add margin bottom
              >
                <Input.TextArea
                  placeholder="Type the description of the experiment"
                  style={{
                    width: '100%', // Full width of the container
                    height: '200px', // Height of the text area
                    fontSize: '16px', // Font size
                    resize: 'vertical', // Allow vertical resizing only
                  }}
                />
              </Form.Item>
            </>
          )}
        </Form.List>
        <Form.Item>
          <div style={{ display: 'flex', justifyContent: 'space-between' }}>
            <Button type="primary" htmlType="submit" style={{
              width: "150px",
              backgroundColor: 'rgb(53, 162, 235)',
            }}>
              Submit
            </Button>

            <Button onClick={() => setDrawerVisible(true)} icon={<PlusOutlined />} style={{
              width: "150px",
              borderColor: 'black',
            }}>
              Add new task
            </Button>
          </div>
        </Form.Item>
      </Form>

      <Drawer
        title="Add new task"
        placement="right"
        closable={true}
        onClose={() => setDrawerVisible(false)}
        visible={drawerVisible}
        width={720}
      >
        <Form
          form={form}
          name="drawer_form"
          onFinish={handleDrawerSubmit}
          style={{ width: "100%" }}
          autoComplete="off"
        >
          <Form.Item
            name="name"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Problem Name</span>}
            rules={[{ required: true, message: 'Please select a problem name!' }]}
          >
            <Select
              showSearch
              placeholder="problem name"
              optionFilterProp="value"
              filterOption={filterOption}
              style={{ fontSize: '14px', width: '300px' }}
              options={data.map(item => ({ value: item.name }))}
            />
          </Form.Item>
          <Form.Item
            name="num_vars"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Number of Variables</span>}
            rules={[{ required: true, message: 'Please enter the number of variables!' }]}
          >
            <Input placeholder="number of variables" style={{ fontSize: '14px', width: '300px' }}/>
          </Form.Item>
          <Form.Item
            name="num_objs"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Number of Objectives</span>}
            rules={[{ required: true, message: 'Please select the number of objectives!' }]}
          >
            <Input placeholder="number of objectives" style={{ fontSize: '14px', width: '300px' }}/>
          </Form.Item>
          <Form.Item
            name="fidelity"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Fidelity</span>}
            rules={[{ required: false, message: 'Please select fidelity!' }]}
          >
            <Select
              placeholder="fidelity"
              options={[]}
              style={{ fontSize: '14px', width: '300px' }}
            />
          </Form.Item>
          <Form.Item
            name="workloads"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Workloads</span>}
            rules={[{ required: true, message: 'Please specify workloads!' }]}
          >
            <Input placeholder="specify workloads" style={{ fontSize: '14px', width: '300px' }}/>
          </Form.Item>
          <Form.Item
            name="budget_type"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Budget Type</span>}
            rules={[{ required: true, message: 'Please select budget type!' }]}
          >
            <Select
              placeholder="budget type"
              style={{ fontSize: '14px', width: '200px' }}
              options={[
                { value: "function evaluations" },
                { value: "hours" },
                { value: "minutes" },
                { value: "seconds" },
              ]}
            />
          </Form.Item>
          <Form.Item
            name="budget"
            label={<span style={{ fontSize: '18px', fontWeight: 'bold' }}>Budget</span>}

            rules={[{ required: true, message: 'Please enter the budget!' }]}
          >
            <Input placeholder="budget" style={{ fontSize: '14px', width: '200px' }} />
          </Form.Item>

          <Form.Item>
            <Button type="primary" htmlType="submit" style={{ width: "150px", backgroundColor: 'rgb(53, 162, 235)' }}>
              Add
            </Button>
          </Form.Item>
        </Form>
      </Drawer>

      <Form>
        <Form.Item>
          <TaskTable tasks={tasks} />
        </Form.Item>
      </Form>
    </>
  );
}

export default SelectTask;


================================================
FILE: webui/src/features/experiment/index.js
================================================
import React from "react";

import TitleCard from "../../components/Cards/TitleCard"


import SelectTask from "./components/SelectTask";
import SelectAlgorithm from "./components/SelectAlgorithm";
import SearchData from "./components/SearchData";
import SelectData from "./components/SelectData";

class Experiment extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      TasksData: [],
      tasks: [],
      SpaceRefiner: [],
      Sampler: [],
      Pretrain: [],
      Model: [],
      ACF: [],
      DataSelector: [],
      Normalizer: [],
      optimizer: {},

      get_info: false,
      DatasetData: {"isExact": false, "datasets": []},
      SpaceRefinerDataSelector: "",
      SpaceRefinerDataSelectorParameters: "",
      SamplerDataSelector: "",
      SamplerDataSelectorParameters: "",
      PretrainDataSelector: "",
      PretrainDataSelectorParameters: "",
      ModelDataSelector: "",
      ModelDataSelectorParameters: "",
      ACFDataSelector: "",
      ACFDataSelectorParameters: "",
      NormalizerDataSelector: "",
      NormalizerDataSelectorParameters: "",
      DatasetSelector: [],
    };
  }

  updateTaskTable = (newTasks) => {
    this.setState({ tasks: newTasks });
  }

  updateOptTable = (newOptimizer) => {
    this.setState({ optimizer: newOptimizer  });
  }


  updateDataTable = (newDatasets) => {
    console.log("newDatasets", newDatasets)
    const { object, DatasetSelector, parameter, datasets } = newDatasets;
    if (object === "Narrow Search Space") {
      this.setState({ SpaceRefiner: datasets, SpaceRefinerDataSelector: DatasetSelector, SpaceRefinerDataSelectorParameters: parameter})
    } else if (object === "Initialization") {
      this.setState({ Sampler: datasets, SamplerDataSelector: DatasetSelector, SamplerDataSelectorParameters: parameter})
    } else if (object === "Pre-train") {
      this.setState({ Pretrain: datasets, PretrainDataSelector: DatasetSelector, PretrainDataSelectorParameters: parameter})
    } else if (object === "Surrogate Model") {
      this.setState({ Model: datasets, ModelDataSelector: DatasetSelector, ModelDataSelectorParameters: parameter})
    } else if (object === "Acquisition Function") {
      this.setState({ ACF: datasets, ACFDataSelector: DatasetSelector, ACFDataSelectorParameters: parameter})
    } else if (object === "Normalizer") {
      this.setState({ Normalizer: datasets, NormalizerDataSelector: DatasetSelector, NormalizerDataSelectorParameters: parameter})
    }
  }

  set_dataset = (datasets) => {
    console.log(datasets)
    this.setState({ DatasetData: datasets })
  }

  render() {
    if (this.state.TasksData.length === 0) {
      const messageToSend = {
        action: 'ask for basic information',
      }
      fetch('http://localhost:5001/api/configuration/basic_information', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })

      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Message from back-end:', data);
        this.setState({ TasksData: data.TasksData,
                        tasks: data.tasks,
                        SpaceRefiner: data.SpaceRefiner,
                        Sampler: data.Sampler,
                        Pretrain: data.Pretrain,
                        Model: data.Model,
                        ACF: data.ACF,
                        DataSelector: data.DataSelector,
                        Normalizer: data.Normalizer,
                        get_info: true,  
                        SpaceRefinerDataSelector: data.SpaceRefinerDataSelector,
                        SpaceRefinerDataSelectorParameters: data.SpaceRefinerDataSelectorParameters,
                        SamplerDataSelector: data.SamplerDataSelector,
                        SamplerDataSelectorParameters: data.SamplerDataSelectorParameters,
                        PretrainDataSelector: data.PretrainDataSelector,
                        PretrainDataSelectorParameters: data.PretrainDataSelectorParameters,
                        ModelDataSelector: data.ModelDataSelector,
                        ModelDataSelectorParameters: data.ModelDataSelectorParameters,
                        ACFDataSelector: data.ACFDataSelector,
                        ACFDataSelectorParameters: data.ACFDataSelectorParameters,
                        NormalizerDataSelector: data.NormalizerDataSelector,
                        NormalizerDataSelectorParameters: data.NormalizerDataSelectorParameters,
                      });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

      fetch('http://localhost:5001/api/RunPage/get_info', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Configuration infomation from back-end:', data);
        this.setState({ tasks: data.tasks, 
                        optimizer: data.optimizer});
                    })
                    .catch((error) => {
        console.error('Error sending message:', error);
      });


    } else {
      return (
        <div className="grid mt-4 w-[1200px] h-[800px] gap-6">

            <TitleCard >
                      <SelectTask data={this.state.TasksData} updateTable={this.updateTaskTable}/>
            </TitleCard>


            <TitleCard
              title={
                <h5>
                  <span className="fw-semi-bold">Build Algorithm</span>
                </h5>
              }
              collapse
            >
              <SelectAlgorithm SpaceRefiner={this.state.SpaceRefiner}
                                    Sampler={this.state.Sampler}
                                    Pretrain={this.state.Pretrain}
                                    Model={this.state.Model}
                                    ACF={this.state.ACF}
                                    DataSelector={this.state.DataSelector}
                                    Normalizer={this.state.Normalizer}
                                    updateTable={this.updateOptTable} />
            </TitleCard>        


            <TitleCard
              title={
                <h5>
                  <span className="fw-semi-bold">Customize Auxiliary Data</span>
                </h5>
              }
              collapse
            >
                  <SearchData set_dataset={this.set_dataset}/>
                  <p>
                    Choose the datasets you want to use in the experiment.
                  </p>
                  <SelectData DatasetData={this.state.DatasetData} updateTable={this.updateDataTable} DatasetSelector={this.state.DatasetSelector}/>
            </TitleCard>
            
        </div>
      );
    }
  }
}

export default Experiment;

================================================
FILE: webui/src/features/integration/index.js
================================================
import { useState } from "react"
import { useDispatch } from "react-redux"
import TitleCard from "../../components/Cards/TitleCard"
import { showNotification } from "../common/headerSlice"


const INITIAL_INTEGRATION_LIST = [
    {name : "Slack", icon : "https://cdn-icons-png.flaticon.com/512/2111/2111615.png", isActive : true, description : "Slack is an instant messaging program designed by Slack Technologies and owned by Salesforce."},
    {name : "Facebook", icon : "https://cdn-icons-png.flaticon.com/512/124/124010.png", isActive : false, description : "Meta Platforms, Inc., doing business as Meta and formerly named Facebook, Inc., and TheFacebook."},
    {name : "Linkedin", icon : "https://cdn-icons-png.flaticon.com/512/174/174857.png", isActive : true, description : "LinkedIn is a business and employment-focused social media platform that works through websites and mobile apps."},
    {name : "Google Ads", icon : "https://cdn-icons-png.flaticon.com/512/2301/2301145.png", isActive : false, description : "Google Ads is an online advertising platform developed by Google, where advertisers bid to display brief advertisements, service offerings"},
    {name : "Gmail", icon : "https://cdn-icons-png.flaticon.com/512/5968/5968534.png", isActive : false, description : "Gmail is a free email service provided by Google. As of 2019, it had 1.5 billion active users worldwide."},
    {name : "Salesforce", icon : "https://cdn-icons-png.flaticon.com/512/5968/5968880.png", isActive : false, description : "It provides customer relationship management software and applications focused on sales, customer service, marketing automation."},
    {name : "Hubspot", icon : "https://cdn-icons-png.flaticon.com/512/5968/5968872.png", isActive : false, description : "American developer and marketer of software products for inbound marketing, sales, and customer service."},
]

function Integration(){

    const dispatch = useDispatch()

    const [integrationList, setIntegrationList] = useState(INITIAL_INTEGRATION_LIST)


    const updateIntegrationStatus = (index) => {
        let integration = integrationList[index]
        setIntegrationList(integrationList.map((i, k) => {
            if(k===index)return {...i, isActive : !i.isActive}
            return i
        }))
        dispatch(showNotification({message : `${integration.name} ${integration.isActive ? "disabled" : "enabled"}` , status : 1}))
    }


    return(
        <>
            <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
            {
                integrationList.map((i, k) => {
                    return(
                        <TitleCard key={k} title={i.name} topMargin={"mt-2"}>
                            
                            <p className="flex">
                                <img alt="icon" src={i.icon} className="w-12 h-12 inline-block mr-4" />
                                {i.description}
                            </p>
                            <div className="mt-6 text-right">
                                <input type="checkbox" className="toggle toggle-success toggle-lg" checked={i.isActive} onChange={() => updateIntegrationStatus(k)}/>
                            </div>
                            
                        </TitleCard>
                    )
                
                })
            }
            </div>
        </>
    )
}

export default Integration

================================================
FILE: webui/src/features/leads/components/AddLeadModalBody.js
================================================
import { useState } from "react"
import { useDispatch } from "react-redux"
import InputText from '../../../components/Input/InputText'
import ErrorText from '../../../components/Typography/ErrorText'
import { showNotification } from "../../common/headerSlice"
import { addNewLead } from "../leadSlice"

const INITIAL_LEAD_OBJ = {
    first_name : "",
    last_name : "",
    email : ""
}

function AddLeadModalBody({closeModal}){
    const dispatch = useDispatch()
    const [loading, setLoading] = useState(false)
    const [errorMessage, setErrorMessage] = useState("")
    const [leadObj, setLeadObj] = useState(INITIAL_LEAD_OBJ)


    const saveNewLead = () => {
        if(leadObj.first_name.trim() === "")return setErrorMessage("First Name is required!")
        else if(leadObj.email.trim() === "")return setErrorMessage("Email id is required!")
        else{
            let newLeadObj = {
                "id": 7,
                "email": leadObj.email,
                "first_name": leadObj.first_name,
                "last_name": leadObj.last_name,
                "avatar": "https://reqres.in/img/faces/1-image.jpg"
            }
            dispatch(addNewLead({newLeadObj}))
            dispatch(showNotification({message : "New Lead Added!", status : 1}))
            closeModal()
        }
    }

    const updateFormValue = ({updateType, value}) => {
        setErrorMessage("")
        setLeadObj({...leadObj, [updateType] : value})
    }

    return(
        <>

            <InputText type="text" defaultValue={leadObj.first_name} updateType="first_name" containerStyle="mt-4" labelTitle="First Name" updateFormValue={updateFormValue}/>

            <InputText type="text" defaultValue={leadObj.last_name} updateType="last_name" containerStyle="mt-4" labelTitle="Last Name" updateFormValue={updateFormValue}/>

            <InputText type="email" defaultValue={leadObj.email} updateType="email" containerStyle="mt-4" labelTitle="Email Id" updateFormValue={updateFormValue}/>


            <ErrorText styleClass="mt-16">{errorMessage}</ErrorText>
            <div className="modal-action">
                <button  className="btn btn-ghost" onClick={() => closeModal()}>Cancel</button>
                <button  className="btn btn-primary px-6" onClick={() => saveNewLead()}>Save</button>
            </div>
        </>
    )
}

export default AddLeadModalBody

================================================
FILE: webui/src/features/leads/index.js
================================================
import moment from "moment"
import { useEffect } from "react"
import { useDispatch, useSelector } from "react-redux"
import TitleCard from "../../components/Cards/TitleCard"
import { openModal } from "../common/modalSlice"
import { deleteLead, getLeadsContent } from "./leadSlice"
import { CONFIRMATION_MODAL_CLOSE_TYPES, MODAL_BODY_TYPES } from '../../utils/globalConstantUtil'
import TrashIcon from '@heroicons/react/24/outline/TrashIcon'
import { showNotification } from '../common/headerSlice'

const TopSideButtons = () => {

    const dispatch = useDispatch()

    const openAddNewLeadModal = () => {
        dispatch(openModal({title : "Add New Lead", bodyType : MODAL_BODY_TYPES.LEAD_ADD_NEW}))
    }

    return(
        <div className="inline-block float-right">
            <button className="btn px-6 btn-sm normal-case btn-primary" onClick={() => openAddNewLeadModal()}>Add New</button>
        </div>
    )
}

function Leads(){

    const {leads } = useSelector(state => state.lead)
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(getLeadsContent())
    }, [])

    
    const getDummyStatus = (index) => {
        if(index % 5 === 0)return <div className="badge">Not Interested</div>
        else if(index % 5 === 1)return <div className="badge badge-primary">In Progress</div>
        else if(index % 5 === 2)return <div className="badge badge-secondary">Sold</div>
        else if(index % 5 === 3)return <div className="badge badge-accent">Need Followup</div>
        else return <div className="badge badge-ghost">Open</div>
    }

    const deleteCurrentLead = (index) => {
        dispatch(openModal({title : "Confirmation", bodyType : MODAL_BODY_TYPES.CONFIRMATION, 
        extraObject : { message : `Are you sure you want to delete this lead?`, type : CONFIRMATION_MODAL_CLOSE_TYPES.LEAD_DELETE, index}}))
    }

    return(
        <>
            
            <TitleCard title="Current Leads" topMargin="mt-2" TopSideButtons={<TopSideButtons />}>

                {/* Leads List in table format loaded from slice after api call */}
            <div className="overflow-x-auto w-full">
                <table className="table w-full">
                    <thead>
                    <tr>
                        <th>Name</th>
                        <th>Email Id</th>
                        <th>Created At</th>
                        <th>Status</th>
                        <th>Assigned To</th>
                        <th></th>
                    </tr>
                    </thead>
                    <tbody>
                        {
                            leads.map((l, k) => {
                                return(
                                    <tr key={k}>
                                    <td>
                                        <div className="flex items-center space-x-3">
                                            <div className="avatar">
                                                <div className="mask mask-squircle w-12 h-12">
                                                    <img src={l.avatar} alt="Avatar" />
                                                </div>
                                            </div>
                                            <div>
                                                <div className="font-bold">{l.first_name}</div>
                                                <div className="text-sm opacity-50">{l.last_name}</div>
                                            </div>
                                        </div>
                                    </td>
                                    <td>{l.email}</td>
                                    <td>{moment(new Date()).add(-5*(k+2), 'days').format("DD MMM YY")}</td>
                                    <td>{getDummyStatus(k)}</td>
                                    <td>{l.last_name}</td>
                                    <td><button className="btn btn-square btn-ghost" onClick={() => deleteCurrentLead(k)}><TrashIcon className="w-5"/></button></td>
                                    </tr>
                                )
                            })
                        }
                    </tbody>
                </table>
            </div>
            </TitleCard>
        </>
    )
}


export default Leads

================================================
FILE: webui/src/features/leads/leadSlice.js
================================================
import { createSlice, createAsyncThunk } from '@reduxjs/toolkit'
import axios from 'axios'


export const getLeadsContent = createAsyncThunk('/leads/content', async () => {
	const response = await axios.get('/api/users?page=2', {})
	return response.data;
})

export const leadsSlice = createSlice({
    name: 'leads',
    initialState: {
        isLoading: false,
        leads : []
    },
    reducers: {


        addNewLead: (state, action) => {
            let {newLeadObj} = action.payload
            state.leads = [...state.leads, newLeadObj]
        },

        deleteLead: (state, action) => {
            let {index} = action.payload
            state.leads.splice(index, 1)
        }
    },

    extraReducers: {
		[getLeadsContent.pending]: state => {
			state.isLoading = true
		},
		[getLeadsContent.fulfilled]: (state, action) => {
			state.leads = action.payload.data
			state.isLoading = false
		},
		[getLeadsContent.rejected]: state => {
			state.isLoading = false
		},
    }
})

export const { addNewLead, deleteLead } = leadsSlice.actions

export default leadsSlice.reducer

================================================
FILE: webui/src/features/run/components/DataTable.js
================================================
import React from "react";
import {
  Table,
} from "reactstrap";

function DataTable({datasets, optimizer}) {
    // console.log("datasets",datasets);
    return (
        <Table lg={12} md={12} sm={12} striped>
            <thead>
                <tr className="fs-sm">
                <th className="hidden-sm-down">#</th>
                <th className="hidden-sm-down">DataSelector</th>
                <th className="hidden-sm-down">Parameters</th>
                <th className="hidden-sm-down">Datasets</th>
                </tr>
            </thead>
            <tbody>
                <tr key="SpaceRefiner">
                    <td>Narrow Search Space</td>
                    <td>{optimizer.SpaceRefinerDataSelector}</td>
                    <td>{optimizer.SpaceRefinerDataSelectorParameters}</td>
                    <td>{datasets.SpaceRefiner.join(', ')}</td>
                </tr>
                <tr key="Sampler">
                    <td>Initialization</td>
                    <td>{optimizer.SamplerDataSelector}</td>
                    <td>{optimizer.SamplerDataSelectorParameters}</td>
                    <td>{datasets.Sampler.join(', ')}</td>
                </tr>
                <tr key="Pretrain">
                    <td>Pre-train</td>
                    <td>{optimizer.PretrainDataSelector}</td>
                    <td>{optimizer.PretrainDataSelectorParameters}</td>
                    <td>{datasets.Pretrain.join(', ')}</td>
                </tr>
                <tr key="Model">
                    <td>Surrogate Model</td>
                    <td>{optimizer.ModelDataSelector}</td>
                    <td>{optimizer.ModelDataSelectorParameters}</td>
                    <td>{datasets.Model.join(', ')}</td>
                </tr>
                <tr key="ACF">
                    <td>Acquisition Function</td>
                    <td>{optimizer.ACFDataSelector}</td>
                    <td>{optimizer.ACFDataSelectorParameters}</td>
                    <td>{datasets.ACF.join(', ')}</td>
                </tr>
                <tr key="Normalizer">
                    <td>Normalizer</td>
                    <td>{optimizer.NormalizerDataSelector}</td>
                    <td>{optimizer.NormalizerDataSelectorParameters}</td>
                    <td>{datasets.Normalizer.join(', ')}</td>
                </tr>
            </tbody>
        </Table>
    );
}

export default DataTable;

================================================
FILE: webui/src/features/run/components/OptTable.js
================================================
import React from "react";
import {
  Table,
} from "reactstrap";

function OptTable({optimizer}) {
    // console.log("optimizer",optimizer);
    return (
        <Table lg={12} md={12} sm={12} striped>
            <thead>
                <tr className="fs-sm">
                <th className="hidden-sm-down">#</th>
                <th className="hidden-sm-down">Narrow Search Space</th>
                <th className="hidden-sm-down">Initialization</th>
                <th className="hidden-sm-down">Pre-train</th>
                <th className="hidden-sm-down">Surrogate Model</th>
                <th className="hidden-sm-down">Acquisition Function</th>
                <th className="hidden-sm-down">Normalizer</th>
                </tr>
            </thead>
            <tbody>
                <tr key="Name">
                    <td>Name</td>
                    <td>{optimizer.SpaceRefiner}</td>
                    <td>{optimizer.Sampler}</td>
                    <td>{optimizer.Pretrain}</td>
                    <td>{optimizer.Model}</td>
                    <td>{optimizer.ACF}</td>
                    <td>{optimizer.Normalizer}</td>
                </tr>
                <tr key="Parameters">
                    <td>Parameters</td>
                    <td>{optimizer.SpaceRefinerParameters}</td>
                    <td>InitNum:{optimizer.SamplerInitNum},{optimizer.SamplerParameters}</td>
                    <td>{optimizer.PretrainParameters}</td>
                    <td>{optimizer.ModelParameters}</td>
                    <td>{optimizer.ACFParameters}</td>
                    <td>{optimizer.NormalizerParameters}</td>
                </tr>
            </tbody>
        </Table>
    );
}

export default OptTable;

================================================
FILE: webui/src/features/run/components/Run.js
================================================
import React, { useState } from "react";

import {
    Button,
    Form,
    Input,
    Space, 
    Select,
    Modal,
    ConfigProvider
} from "antd";

function Run() {
    const [form] = Form.useForm()

    const onFinish = (values) => {
        // 构造要发送到后端的数据
        const messageToSend = values
        console.log('Request data:', messageToSend);
        // 向后端发送请求...
        fetch('http://localhost:5001/api/configuration/run', {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify(messageToSend),
          })
          .then(response => {
            if (!response.ok) {
              throw new Error('Network response was not ok');
            } 
            return response.json();
          })
          .then(isSucceed => {
            console.log('Message from back-end:', isSucceed);
            Modal.success({
              title: 'Information',
              content: 'Run Successfully!'
            })
          })
          .catch((error) => {
            console.error('Error sending message:', error);
            var errorMessage = error.error;
            Modal.error({
              title: 'Information',
              content: 'Error:' + errorMessage
            })
          });
      };

    return (
        <ConfigProvider
          theme={{
            components: {
              Input: {
                addonBg:"black"
              },
            },
          }}  
        >
        <Form
            form={form}
            name="dynamic_form_nest_item"
            onFinish={onFinish}
            style={{ width:"100%" }}
            autoComplete="off"
        >
            <div style={{ overflowY: 'auto', maxHeight: '150px' }}>
                <div style={{ display: 'flex', alignItems: 'baseline' }}>
                    <h6 style={{color:"black"}}>Seeds</h6>
                    <Form.Item name="Seeds" style={{marginRight:10, marginLeft:10}}>
                        <Input />
                    </Form.Item>
                    <h6 style={{color:"black"}}>Remote</h6>
                    <Form.Item name="Remote" style={{marginRight:10, marginLeft:10}}>
                      <Select
                      options={[ {value: "True"},
                                  {value: "False"},
                              ]}
                      />
                    </Form.Item>
                    <h6 style={{color:"black"}}>ServerURL</h6>
                    <Form.Item name="ServerURL" style={{marginLeft:10}}>
                        <Input />
                    </Form.Item>
                </div>
            </div>
            <Form.Item>
            <Button type="primary" htmlType="submit" style={{width:"120px"}}>
                Run
            </Button>
            </Form.Item>
        </Form>
      </ConfigProvider>
    );
}

export default Run;

================================================
FILE: webui/src/features/run/components/RunProgress.js
================================================
import React, { useState } from "react";
import { MinusCircleOutlined } from '@ant-design/icons'
import {
    Progress,
    ConfigProvider
} from "antd";


class RunProgress extends React.Component {
    constructor(props) {
        super(props);
        this.state = {
            twoColors: {
                '0%': '#108ee9',
                '100%': '#87d068',
            },
            data: []
        }
    }
    // 与后端交互，获取任务进度
    componentDidMount() {
        // 开始定时调用 fetchData 函数
        this.intervalId = setInterval(this.fetchData, 1000);
      }
    
      componentWillUnmount() {
        // 清除定时器，以防止内存泄漏
        clearInterval(this.intervalId);
      }
    
      fetchData = async () => {
        try {
          const messageToSend = {
            message:"ask for progress"
          }
          const response = await fetch('http://localhost:5001/api/configuration/run_progress', {
            method: 'POST',
            headers: {
              'Content-Type': 'application/json',
            },
            body: JSON.stringify(messageToSend)
          });
          if (!response.ok) {
            throw new Error('Network response was not ok');
          }
          const data = await response.json();
          console.log('Progress:', data);
          // 在这里处理从服务器获取的数据
          this.setState({
            data: data
          })
          // console.log('State:', this.state.BarData)
        } catch (error) {
          console.error('Error fetching data:', error);
        }
      };

      handleClick = (task_name) => {
        const messageToSend = {
          name: task_name
        }
        fetch('http://localhost:5001/api/configuration/stop_progress', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });
      }

      render() {
        return (
            <ConfigProvider
            theme={{
                token:{
                    colorText: "#696969"
                },
                components: {
                Progress: {
                    remainingColor: "#696969"
                },
                },
            }}
            >
                <div style={{ overflowY: 'auto', maxHeight: '200px', maxWidth: '100%' }}>
                    {this.state.data.map((task, index) => (
                        <div key={index} style={{ marginBottom: 10 }}>
                            <h6>{task.name}</h6>
                            <Progress percent={task.progress} status="active" type="line" strokeColor={this.state.twoColors} style={{ width:"93%", marginRight:25}} />
                            <MinusCircleOutlined style={{color: 'white'}} onClick={()=>this.handleClick(task.name)} />
                        </div>
                    ))}
                </div>
            </ConfigProvider>
        )
      }
}


export default RunProgress;

================================================
FILE: webui/src/features/run/components/TaskTable.js
================================================
import React from "react";
import {
  Table,
} from "reactstrap";

function TaskTable({tasks}) {
    // console.log(tasks);
    return (
        <Table lg={12} md={12} sm={12} striped>
            <thead>
                <tr className="fs-sm">
                <th className="hidden-sm-down">#</th>
                <th className="hidden-sm-down">Name</th>
                <th className="hidden-sm-down">Num_vars</th>
                <th className="hidden-sm-down">Num_objs</th>
                <th className="hidden-sm-down">Fidelity</th>
                <th className="hidden-sm-down">workloads</th>
                <th className="hidden-sm-down">budget_type</th>
                <th className="hidden-sm-down">budget</th>
                </tr>
            </thead>
            <tbody>
                {tasks.map((task, index) => (
                    <tr key={index}>
                        <td>{index+1}</td>
                        <td>{task.name}</td>
                        <td>{task.num_vars}</td>
                        <td>{task.num_objs}</td>
                        <td>{task.fidelity}</td>
                        <td>{task.workloads}</td>
                        <td>{task.budget_type}</td>
                        <td>{task.budget}</td>
                    </tr>
                ))}
            </tbody>
        </Table>
    );
}

export default TaskTable;

================================================
FILE: webui/src/features/run/index.js
================================================
import React from "react";

import { Row, Col } from "reactstrap";

import TitleCard from "../../components/Cards/TitleCard"

import Run from "./components/Run"
import RunProgress from "./components/RunProgress"
import TaskTable from "./components/TaskTable";
import OptTable from "./components/OptTable";
import DataTable from "./components/DataTable";


class RunPage extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      get_info: false,
      tasks: [],
      optimizer: {},
      datasets: {},
    };
  }

  render() { 
    // If first time rendering, then render the default task
    // If not, then render the task that was clicked
    if (this.state.get_info === false) {
      // TODO: ask for task list from back-end
      const messageToSend = {
        action: 'ask for information',
      }
      fetch('http://localhost:5001/api/RunPage/get_info', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Configuration infomation from back-end:', data);
        this.setState({ get_info: true,  
                        tasks: data.tasks,
                        optimizer: data.optimizer,
                        datasets: data.datasets});
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

      
      // Set the default task as the first task in the list
      return (
        <div>
          <h1 className="page-title">
            <span className="fw-semi-bold">Run</span>
          </h1>
        </div>
      )
    } else {

      return (
        <div>
          <div>
            <Row>
              <Col lg={12} xs={12}>
                <TitleCard
                  title={
                    <h5>
                      <span className="fw-semi-bold">Experiment Set-up</span>
                    </h5>
                  }
                  collapse
                >
                  <h4>
                    Problems
                  </h4>
                  <TaskTable tasks={this.state.tasks} />
                  <h4>
                    Algorithm
                  </h4>
                  <OptTable optimizer={this.state.optimizer} />
                  <h4>
                    Data
                  </h4>
                  <DataTable datasets={this.state.datasets} optimizer={this.state.optimizer}/>
                  <Run />
                  <RunProgress />
                </TitleCard>
              </Col>
            </Row>
          </div>
        </div>
      );
    }
  }

}

export default RunPage;


================================================
FILE: webui/src/features/seldata/components/DataTable.js
================================================
import React from "react";
import {
  Table,
} from "reactstrap";

function DataTable({ SpaceRefiner, SpaceRefinerDataSelector, SpaceRefinerDataSelectorParameters,
    Sampler, SamplerDataSelector, SamplerDataSelectorParameters,
    Pretrain, PretrainDataSelector, PretrainDataSelectorParameters,
    Model, ModelDataSelector, ModelDataSelectorParameters,
    ACF, ACFDataSelector, ACFDataSelectorParameters,
    Normalizer, NormalizerDataSelector, NormalizerDataSelectorParameters,
}) {
    console.log("SpaceRefiner",SpaceRefiner);
    return (
        <Table lg={12} md={12} sm={12} striped>
            <thead>
                <tr className="fs-sm">
                <th className="hidden-sm-down">#</th>
                <th className="hidden-sm-down">DataSelector</th>
                <th className="hidden-sm-down">Parameters</th>
                <th className="hidden-sm-down">Datasets</th>
                </tr>
            </thead>
            <tbody>
                <tr key="SpaceRefiner">
                    <td>Narrow Search Space</td>
                    <td>{SpaceRefinerDataSelector}</td>
                    <td>{SpaceRefinerDataSelectorParameters}</td>
                    <td>{SpaceRefiner.join(', ')}</td>
                </tr>
                <tr key="Sampler">
                    <td>Initialization</td>
                    <td>{SamplerDataSelector}</td>
                    <td>{SamplerDataSelectorParameters}</td>
                    <td>{Sampler.join(', ')}</td>
                </tr>
                <tr key="Pretrain">
                    <td>Pre-train</td>
                    <td>{PretrainDataSelector}</td>
                    <td>{PretrainDataSelectorParameters}</td>
                    <td>{Pretrain.join(', ')}</td>
                </tr>
                <tr key="Model">
                    <td>Surrogate Model</td>
                    <td>{ModelDataSelector}</td>
                    <td>{ModelDataSelectorParameters}</td>
                    <td>{Model.join(', ')}</td>
                </tr>
                <tr key="ACF">
                    <td>Acquisition Function</td>
                    <td>{ACFDataSelector}</td>
                    <td>{ACFDataSelectorParameters}</td>
                    <td>{ACF.join(', ')}</td>
                </tr>
                <tr key="Normalizer">
                    <td>Normalizer</td>
                    <td>{NormalizerDataSelector}</td>
                    <td>{NormalizerDataSelectorParameters}</td>
                    <td>{Normalizer.join(', ')}</td>
                </tr>
            </tbody>
        </Table>
    );
}

export default DataTable;

================================================
FILE: webui/src/features/seldata/components/SearchData.js
================================================
import React, {useState} from "react";

import {
    Row,
    Col,
    Button,
    InputNumber,
    Slider,
    Space,
    Input,
    Form,
    ConfigProvider,
    Select,
    Modal,
} from "antd";


function SearchData({set_dataset}) {
  const [form] = Form.useForm()

  const onFinish = (values) => {
    const messageToSend = values;
    console.log('Request data:', messageToSend);
    // 向后端发送请求...
    fetch('http://localhost:5001/api/configuration/search_dataset', {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
      },
      body: JSON.stringify(messageToSend), 
    })
    .then(response => {
      if (!response.ok) {
        throw new Error('Network response was not ok');
      } 
      return response.json();
    })
    .then(message => {
      console.log('Message from back-end:', message);
        set_dataset(message)
      }
    )
    .catch((error) => {
      console.error('Error sending message:', error);
      var errorMessage = error.error;
      Modal.error({
        title: 'Information',
        content: 'Error:' + errorMessage
      })
    });
  }

  return(
    <ConfigProvider
      theme={{
        components: {
          Input: {
            addonBg:"white"
          },
        },
      }}  
    >
    <Form
      name="SearchData"
      form={form}
      onFinish={onFinish}
      style={{width:"100%"}}
      autoComplete="off"
    >
      <Space className="space" style={{ display: 'flex'}} align="baseline">
        <Form.Item
          name="task_name"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Dataset Name"}/>
        </Form.Item>
        <Form.Item
          name="num_variables"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Num of Variables"}/>
        </Form.Item>
      </Space>
      <Space className="space" style={{ display: 'flex'}} align="baseline">
        <Form.Item
          name="variables_name"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Variable Name"}/>
        </Form.Item>
        <Form.Item
          name="num_objectives"
          style={{flexGrow: 1}}
        >
          <Input addonBefore={"Num of Objectives"}/>
        </Form.Item>
      </Space>
      <h6 style={{color:"black"}}>Search method:</h6>
      <Space className="space" style={{ display: 'flex'}} align="baseline">
      <Form.Item
        name="search_method"
      >
        <Select style={{minWidth: 150}}
          options={[ {value: "Hash"},
                      {value: "Fuzzy"},
                      {value: "LSH"},
                  ]}
        />
      </Form.Item>
      <Form.Item>
        <Button type="primary" htmlType="submit" style={{width:"120px"}}>
          Search
        </Button>
      </Form.Item>
      </Space>
    </Form>
    </ConfigProvider>
  )
}

export default SearchData

================================================
FILE: webui/src/features/seldata/components/SelectData.css
================================================


================================================
FILE: webui/src/features/seldata/components/SelectData.js
================================================
import React, {useState} from "react";

import {
    Button,
    Checkbox,
    ConfigProvider,
    Modal,
    Select,
    Input,
} from "antd";

const CheckboxGroup = Checkbox.Group;

function SelectData({DatasetData, updateTable, DatasetSelector}) {
    var data = []
    if (DatasetData.isExact) {
      data = [DatasetData.datasets.name]
    } else {
      data = DatasetData.datasets
    }
    const [checkedList, setCheckedList] = useState([]);
    const [selectedOption, setSelectedOption] = useState();
    const [selector, setSelector] = useState("None");
    const [parameter, setParameter] = useState("");
    const checkAll = data.length === checkedList.length;
    const indeterminate = checkedList.length > 0 && checkedList.length < data.length;
    const onChange = (list) => {
        setCheckedList(list);
    };
    const onCheckAllChange = (e) => {
        setCheckedList(e.target.checked ? data : []);
    };
    const handleSelectChange = (value) => {
      setSelectedOption(value); // 当选择发生变化时更新选项
    };
    const handleSelectorChange = (value) => {
      setSelector(value); 
    };
    const handleParameterChange = (event) => {
      setParameter(event.target.value);
    };
    const handleClick = () => {
      const datasetList = checkedList.map(item => {
        return item;
      });
      const messageToSend = {
        object: selectedOption,
        DatasetSelector: selector,
        parameter: parameter,
        datasets: datasetList,
      }
      updateTable(messageToSend)
      console.log(messageToSend)
      fetch('http://localhost:5001/api/configuration/dataset', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
        Modal.success({
          title: 'Information',
          content: 'Submit successfully!'
        })
      })
      .catch((error) => {
        console.error('Error sending message:', error);
        var errorMessage = error.error;
        Modal.error({
          title: 'Information',
          content: 'Error:' + errorMessage
        })
      });
    }

    const handleDelete = () => {
      const datasetList = checkedList.map(item => {
        return item;
      });
      const messageToSend = {
        datasets: datasetList,
      }
      console.log(messageToSend)
      fetch('http://localhost:5001/api/configuration/delete_dataset', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(succeed => {
        console.log('Message from back-end:', succeed);
        datasetList.forEach(item => {
          let index = data.indexOf(item);
          if (index !== -1) {
            data.splice(index, 1);
          }
        });
        var newDataset = {"isExact": false, "datasets": data}
        console.log("new dataset:", newDataset)
        // set_dataset(newDataset)
        Modal.success({
          title: 'Information',
          content: 'Delete successfully!'
        })
      })
      .catch((error) => {
        console.error('Error sending message:', error);
        var errorMessage = error.error;
        Modal.error({
          title: 'Information',
          content: 'Error:' + errorMessage
        })
      });
    }

    return(
        <ConfigProvider
          theme={{
            components: {
              Checkbox: {
                colorText:"black"
              },
            },
          }}        
        >
          <div style={{ overflowY: 'auto', maxHeight: '300px' }}>
            <Checkbox indeterminate={indeterminate} onChange={onCheckAllChange} checked={checkAll}>
                Check all
            </Checkbox>
            <CheckboxGroup options={data} value={checkedList} onChange={onChange}/>
          </div>
          <Info  isExact={DatasetData.isExact} data={DatasetData.datasets}/>
          <div style={{marginTop:"20px"}}>
            <Select
            style={{minWidth: 170, margin: 5}}
            options={[ {value: "Narrow Search Space"},
                        {value: "Initialization"},
                        {value: "Pre-train"},
                        {value: "Surrogate Model"},
                        {value: "Acquisition Function"},
                        {value: "Normalizer"}
                    ]}
            onChange={handleSelectChange}
            />
            <Select
            style={{minWidth: 90, margin:5}}
            placeholder = "Dataset Selector"
            options = {DatasetSelector.map(item => ({ value: item.name })).concat({ value: "None" })}
            onChange={handleSelectorChange}
            />
            <Input style={{width: 400, margin:5}} placeholder="Parameters" onChange={handleParameterChange}/>
            <Button type="primary" htmlType="submit" style={{width:"120px", margin:5}} onClick={handleClick}>
              Submit
            </Button>
            <Button danger style={{width:"120px", margin:5}} onClick={handleDelete}>
              Delete
            </Button>
          </div>
        </ConfigProvider>
    )
}


function Info({isExact, data}) {
  if (isExact) {
    return (
      <div style={{ overflowY: 'auto', maxHeight: '250px' }}>
        <h4><strong>Information</strong></h4>
          <ul>
            <li><h6><span className="fw-semi-bold">Name</span>: {data.name}</h6></li>
            <li><h6><span className="fw-semi-bold">Dim</span>: {data.dim}</h6></li>
            <li><h6><span className="fw-semi-bold">Obj</span>: {data.obj}</h6></li>
            <li><h6><span className="fw-semi-bold">Fidelity</span>: {data.fidelity}</h6></li>
            <li><h6><span className="fw-semi-bold">Workloads</span>: {data.workloads}</h6></li>
            <li><h6><span className="fw-semi-bold">Budget type</span>: {data.budget_type}</h6></li>
            <li><h6><span className="fw-semi-bold">Budget</span>: {data.budget}</h6></li>
            <li><h6><span className="fw-semi-bold">Seeds</span>: {data.seeds}</h6></li>
          </ul>
          <h4 className="mt-5"><strong>Algorithm</strong></h4>
          <ul>
            <li><h6><span className="fw-semi-bold">Space refiner</span>: {data.SpaceRefiner}</h6></li>
            <li><h6><span className="fw-semi-bold">Sampler</span>: {data.Sampler}</h6></li>
            <li><h6><span className="fw-semi-bold">Pretrain</span>: {data.Pretrain}</h6></li>
            <li><h6><span className="fw-semi-bold">Model</span>: {data.Model}</h6></li>
            <li><h6><span className="fw-semi-bold">ACF</span>: {data.ACF}</h6></li>
            <li><h6><span className="fw-semi-bold">DatasetSelector</span>: {data.DatasetSelector}</h6></li>
            <li><h6><span className="fw-semi-bold">Normalizer</span>: {data.Normalizer}</h6></li>
          </ul>
          <h4 className="mt-5"><strong>Auxiliary Data List</strong></h4>
          <ul>
            {data.metadata.map((dataset, index) => (
              <li key={index}><h6>{dataset}</h6></li>
            ))}
          </ul>
      </div>
    )
  } else {
    return (
      <></>
    )
  }
}

export default SelectData

================================================
FILE: webui/src/features/seldata/index.js
================================================
import React from "react";
import {
  Row,
  Col,
} from "reactstrap";

import TitleCard from "../../components/Cards/TitleCard"

import SelectData from "./components/SelectData";
import SearchData from "./components/SearchData"
import DataTable from "./components/DataTable";


class Dataselector extends React.Component {
  constructor(props) {
    super(props);
    this.state = {
      get_info: false,
      DatasetData: {"isExact": false, "datasets": []},
      SpaceRefiner: [],
      SpaceRefinerDataSelector: "",
      SpaceRefinerDataSelectorParameters: "",
      Sampler: [],
      SamplerDataSelector: "",
      SamplerDataSelectorParameters: "",
      Pretrain: [],
      PretrainDataSelector: "",
      PretrainDataSelectorParameters: "",
      Model: [],
      ModelDataSelector: "",
      ModelDataSelectorParameters: "",
      ACF: [],
      ACFDataSelector: "",
      ACFDataSelectorParameters: "",
      Normalizer: [],
      NormalizerDataSelector: "",
      NormalizerDataSelectorParameters: "",
      DatasetSelector: [],
    };
  }

  updateTable = (newDatasets) => {
    console.log("newDatasets", newDatasets)
    const { object, DatasetSelector, parameter, datasets } = newDatasets;
    if (object === "Narrow Search Space") {
      this.setState({ SpaceRefiner: datasets, SpaceRefinerDataSelector: DatasetSelector, SpaceRefinerDataSelectorParameters: parameter})
    } else if (object === "Initialization") {
      this.setState({ Sampler: datasets, SamplerDataSelector: DatasetSelector, SamplerDataSelectorParameters: parameter})
    } else if (object === "Pre-train") {
      this.setState({ Pretrain: datasets, PretrainDataSelector: DatasetSelector, PretrainDataSelectorParameters: parameter})
    } else if (object === "Surrogate Model") {
      this.setState({ Model: datasets, ModelDataSelector: DatasetSelector, ModelDataSelectorParameters: parameter})
    } else if (object === "Acquisition Function") {
      this.setState({ ACF: datasets, ACFDataSelector: DatasetSelector, ACFDataSelectorParameters: parameter})
    } else if (object === "Normalizer") {
      this.setState({ Normalizer: datasets, NormalizerDataSelector: DatasetSelector, NormalizerDataSelectorParameters: parameter})
    }
  }

  set_dataset = (datasets) => {
    console.log(datasets)
    this.setState({ DatasetData: datasets })
  }

  render() {
    if (this.state.get_info === false) {
      // TODO: ask for task list from back-end
      const messageToSend = {
        action: 'ask for information',
      }
      fetch('http://localhost:5001/api/RunPage/get_info', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Configuration infomation from back-end:', data);
        this.setState({ get_info: true,  
                        SpaceRefiner: data.datasets.SpaceRefiner,
                        Sampler: data.datasets.Sampler,
                        Pretrain: data.datasets.Pretrain,
                        Model: data.datasets.Model,
                        ACF: data.datasets.ACF,
                        Normalizer: data.datasets.Normalizer,
                        SpaceRefinerDataSelector: data.optimizer.SpaceRefinerDataSelector,
                        SpaceRefinerDataSelectorParameters: data.optimizer.SpaceRefinerDataSelectorParameters,
                        SamplerDataSelector: data.optimizer.SamplerDataSelector,
                        SamplerDataSelectorParameters: data.optimizer.SamplerDataSelectorParameters,
                        PretrainDataSelector: data.optimizer.PretrainDataSelector,
                        PretrainDataSelectorParameters: data.optimizer.PretrainDataSelectorParameters,
                        ModelDataSelector: data.optimizer.ModelDataSelector,
                        ModelDataSelectorParameters: data.optimizer.ModelDataSelectorParameters,
                        ACFDataSelector: data.optimizer.ACFDataSelector,
                        ACFDataSelectorParameters: data.optimizer.ACFDataSelectorParameters,
                        NormalizerDataSelector: data.optimizer.NormalizerDataSelector,
                        NormalizerDataSelectorParameters: data.optimizer.NormalizerDataSelectorParameters,
                      });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

      fetch('http://localhost:5001/api/configuration/basic_information', {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(messageToSend),
      })
      .then(response => {
        if (!response.ok) {
          throw new Error('Network response was not ok');
        } 
        return response.json();
      })
      .then(data => {
        console.log('Message from back-end:', data);
        this.setState({ DatasetSelector: data.DataSelector });
      })
      .catch((error) => {
        console.error('Error sending message:', error);
      });

  } else {
      return (
        <div>
            <Row>
              <Col lg={12} sm={12}> 
                <TitleCard>
                  <SearchData set_dataset={this.set_dataset}/>
                  <p>
                    Choose the datasets you want to use in the experiment.
                  </p>
                  <SelectData DatasetData={this.state.DatasetData} updateTable={this.updateTable} DatasetSelector={this.state.DatasetSelector}/>
                </TitleCard>
              </Col>
              <Col lg={12} xs={12}>
                <TitleCard
                  title={
                    <h5>
                      <span className="fw-semi-bold">Selected Datasets</span>
                    </h5>
                  }
                  collapse
                >
                  <DataTable SpaceRefiner={this.state.SpaceRefiner} 
                              SpaceRefinerDataSelector={this.state.SpaceRefinerDataSelector}
                              SpaceRefinerDataSelectorParameters={this.state.SpaceRefinerDataSelectorParameters}
                              Sampler={this.state.Sampler} 
                              SamplerDataSelector={this.state.SamplerDataSelector}
                              SamplerDataSelectorParameters={this.state.SamplerDataSelectorParameters}
                              Pretrain={this.state.Pretrain} 
                              PretrainDataSelector={this.state.PretrainDataSelector}
                              PretrainDataSelectorParameters={this.state.PretrainDataSelectorParameters}
                              Model={this.state.Model} 
                              ModelDataSelector={this.state.ModelDataSelector}
                              ModelDataSelectorParameters={this.state.ModelDataSelectorParameters}
                              ACF={this.state.ACF} 
                              ACFDataSelector={this.state.ACFDataSelector}
                              ACFDataSelectorParameters={this.state.ACFDataSelectorParameters}
                              Normalizer={this.state.Normalizer}
                              NormalizerDataSelector={this.state.NormalizerDataSelector}
                              NormalizerDataSelectorParameters={this.state.NormalizerDataSelectorParameters}
                  />
                </TitleCard>
              </Col>
            </Row>
        </div>
      );
    }
  }
}

export default Dataselector;

================================================
FILE: webui/src/features/settings/billing/index.js
================================================
import moment from "moment"
import { useEffect, useState } from "react"
import { useDispatch, useSelector } from "react-redux"
import TitleCard from "../../../components/Cards/TitleCard"
import { showNotification } from '../../common/headerSlice'


const BILLS = [
    {invoiceNo : "#4567", amount : "23,989", description : "Product usages", status : "Pending", generatedOn : moment(new Date()).add(-30*1, 'days').format("DD MMM YYYY"),  paidOn : "-"},

    {invoiceNo : "#4523", amount : "34,989", description : "Product usages", status : "Pending", generatedOn : moment(new Date()).add(-30*2, 'days').format("DD MMM YYYY"), paidOn : "-"},

    {invoiceNo : "#4453", amount : "39,989", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*3, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*2, 'days').format("DD MMM YYYY")},

    {invoiceNo : "#4359", amount : "28,927", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*4, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*3, 'days').format("DD MMM YYYY")},

    {invoiceNo : "#3359", amount : "28,927", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*5, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*4, 'days').format("DD MMM YYYY")},

    {invoiceNo : "#3367", amount : "28,927", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*6, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*5, 'days').format("DD MMM YYYY")},

    {invoiceNo : "#3359", amount : "28,927", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*7, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*6, 'days').format("DD MMM YYYY")},

    {invoiceNo : "#2359", amount : "28,927", description : "Product usages", status : "Paid", generatedOn : moment(new Date()).add(-30*8, 'days').format("DD MMM YYYY"), paidOn : moment(new Date()).add(-24*7, 'days').format("DD MMM YYYY")},


]

function Billing(){


    const [bills, setBills] = useState(BILLS)

    const getPaymentStatus = (status) => {
        if(status  === "Paid")return <div className="badge badge-success">{status}</div>
        if(status  === "Pending")return <div className="badge badge-primary">{status}</div>
        else return <div className="badge badge-ghost">{status}</div>
    }

    return(
        <>
            
            <TitleCard title="Billing History" topMargin="mt-2">

                {/* Invoice list in table format loaded constant */}
            <div className="overflow-x-auto w-full">
                <table className="table w-full">
                    <thead>
                    <tr>
                        <th>Invoice No</th>
                        <th>Invoice Generated On</th>
                        <th>Description</th>
                        <th>Amount</th>
                        <th>Status</th>
                        <th>Invoice Paid On</th>
                    </tr>
                    </thead>
                    <tbody>
                        {
                            bills.map((l, k) => {
                                return(
                                    <tr key={k}>
                                    <td>{l.invoiceNo}</td>
                                    <td>{l.generatedOn}</td>
                                    <td>{l.description}</td>
                                    <td>${l.amount}</td>
                                    <td>{getPaymentStatus(l.status)}</td>
                                    <td>{l.paidOn}</td>
                                    </tr>
                                )
                            })
                        }
                    </tbody>
                </table>
            </div>
            </TitleCard>
        </>
    )
}


export default Billing

================================================
FILE: webui/src/features/settings/profilesettings/index.js
================================================
import moment from "moment"
import { useEffect, useState } from "react"
import { useDispatch, useSelector } from "react-redux"
import TitleCard from "../../../components/Cards/TitleCard"
import { showNotification } from '../../common/headerSlice'
import InputText from '../../../components/Input/InputText'
import TextAreaInput from '../../../components/Input/TextAreaInput'
import ToogleInput from '../../../components/Input/ToogleInput'

function ProfileSettings(){


    const dispatch = useDispatch()

    // Call API to update profile settings changes
    const updateProfile = () => {
        dispatch(showNotification({message : "Profile Updated", status : 1}))    
    }

    const updateFormValue = ({updateType, value}) => {
        console.log(updateType)
    }

    return(
        <>
            
            <TitleCard title="Profile Settings" topMargin="mt-2">

                <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
                    <InputText labelTitle="Name" defaultValue="Alex" updateFormValue={updateFormValue}/>
                    <InputText labelTitle="Email Id" defaultValue="alex@dashwind.com" updateFormValue={updateFormValue}/>
                    <InputText labelTitle="Title" defaultValue="UI/UX Designer" updateFormValue={updateFormValue}/>
                    <InputText labelTitle="Place" defaultValue="California" updateFormValue={updateFormValue}/>
                    <TextAreaInput labelTitle="About" defaultValue="Doing what I love, part time traveller" updateFormValue={updateFormValue}/>
                </div>
                <div className="divider" ></div>

                <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
                    <InputText labelTitle="Language" defaultValue="English" updateFormValue={updateFormValue}/>
                    <InputText labelTitle="Timezone" defaultValue="IST" updateFormValue={updateFormValue}/>
                    <ToogleInput updateType="syncData" labelTitle="Sync Data" defaultValue={true} updateFormValue={updateFormValue}/>
                </div>

                <div className="mt-16"><button className="btn btn-primary float-right" onClick={() => updateProfile()}>Update</button></div>
            </TitleCard>
        </>
    )
}


export default ProfileSettings

================================================
FILE: webui/src/features/settings/team/index.js
================================================
import moment from "moment"
import { useEffect, useState } from "react"
import { useDispatch, useSelector } from "react-redux"
import TitleCard from "../../../components/Cards/TitleCard"
import { showNotification } from '../../common/headerSlice'

const TopSideButtons = () => {

    const dispatch = useDispatch()

    const addNewTeamMember = () => {
        dispatch(showNotification({message : "Add New Member clicked", status : 1}))
    }

    return(
        <div className="inline-block float-right">
            <button className="btn px-6 btn-sm normal-case btn-primary" onClick={() => addNewTeamMember()}>Invite New</button>
        </div>
    )
}


const TEAM_MEMBERS = [
    {name : "Alex", avatar : "https://reqres.in/img/faces/1-image.jpg", email : "alex@dashwind.com", role : "Owner", joinedOn : moment(new Date()).add(-5*1, 'days').format("DD MMM YYYY"), lastActive : "5 hr ago"},
    {name : "Ereena", avatar : "https://reqres.in/img/faces/2-image.jpg", email : "ereena@dashwind.com", role : "Admin", joinedOn : moment(new Date()).add(-5*2, 'days').format("DD MMM YYYY"), lastActive : "15 min ago"},
    {name : "John", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "jhon@dashwind.com", role : "Admin", joinedOn : moment(new Date()).add(-5*3, 'days').format("DD MMM YYYY"), lastActive : "20 hr ago"},
    {name : "Matrix", avatar : "https://reqres.in/img/faces/4-image.jpg", email : "matrix@dashwind.com", role : "Manager", joinedOn : moment(new Date()).add(-5*4, 'days').format("DD MMM YYYY"), lastActive : "1 hr ago"},
    {name : "Virat", avatar : "https://reqres.in/img/faces/5-image.jpg", email : "virat@dashwind.com", role : "Support", joinedOn : moment(new Date()).add(-5*5, 'days').format("DD MMM YYYY"), lastActive : "40 min ago"},
    {name : "Miya", avatar : "https://reqres.in/img/faces/6-image.jpg", email : "miya@dashwind.com", role : "Support", joinedOn : moment(new Date()).add(-5*7, 'days').format("DD MMM YYYY"), lastActive : "5 hr ago"},

]

function Team(){


    const [members, setMembers] = useState(TEAM_MEMBERS)

    const getRoleComponent = (role) => {
        if(role  === "Admin")return <div className="badge badge-secondary">{role}</div>
        if(role  === "Manager")return <div className="badge">{role}</div>
        if(role  === "Owner")return <div className="badge badge-primary">{role}</div>
        if(role  === "Support")return <div className="badge badge-accent">{role}</div>
        else return <div className="badge badge-ghost">{role}</div>
    }

    return(
        <>
            
            <TitleCard title="Active Members" topMargin="mt-2" TopSideButtons={<TopSideButtons />}>

                {/* Team Member list in table format loaded constant */}
            <div className="overflow-x-auto w-full">
                <table className="table w-full">
                    <thead>
                    <tr>
                        <th>Name</th>
                        <th>Email Id</th>
                        <th>Joined On</th>
                        <th>Role</th>
                        <th>Last Active</th>
                    </tr>
                    </thead>
                    <tbody>
                        {
                            members.map((l, k) => {
                                return(
                                    <tr key={k}>
                                    <td>
                                        <div className="flex items-center space-x-3">
                                            <div className="avatar">
                                                <div className="mask mask-circle w-12 h-12">
                                                    <img src={l.avatar} alt="Avatar" />
                                                </div>
                                            </div>
                                            <div>
                                                <div className="font-bold">{l.name}</div>
                                            </div>
                                        </div>
                                    </td>
                                    <td>{l.email}</td>
                                    <td>{l.joinedOn}</td>
                                    <td>{getRoleComponent(l.role)}</td>
                                    <td>{l.lastActive}</td>
                                    </tr>
                                )
                            })
                        }
                    </tbody>
                </table>
            </div>
            </TitleCard>
        </>
    )
}


export default Team

================================================
FILE: webui/src/features/transactions/index.js
================================================
import moment from "moment"
import { useEffect, useState } from "react"
import { useDispatch, useSelector } from "react-redux"
import { showNotification } from "../common/headerSlice"
import TitleCard from "../../components/Cards/TitleCard"
import { RECENT_TRANSACTIONS } from "../../utils/dummyData"
import FunnelIcon from '@heroicons/react/24/outline/FunnelIcon'
import XMarkIcon from '@heroicons/react/24/outline/XMarkIcon'
import SearchBar from "../../components/Input/SearchBar"

const TopSideButtons = ({removeFilter, applyFilter, applySearch}) => {

    const [filterParam, setFilterParam] = useState("")
    const [searchText, setSearchText] = useState("")
    const locationFilters = ["Paris", "London", "Canada", "Peru", "Tokyo"]

    const showFiltersAndApply = (params) => {
        applyFilter(params)
        setFilterParam(params)
    }

    const removeAppliedFilter = () => {
        removeFilter()
        setFilterParam("")
        setSearchText("")
    }

    useEffect(() => {
        if(searchText == ""){
            removeAppliedFilter()
        }else{
            applySearch(searchText)
        }
    }, [searchText])

    return(
        <div className="inline-block float-right">
            <SearchBar searchText={searchText} styleClass="mr-4" setSearchText={setSearchText}/>
            {filterParam != "" && <button onClick={() => removeAppliedFilter()} className="btn btn-xs mr-2 btn-active btn-ghost normal-case">{filterParam}<XMarkIcon className="w-4 ml-2"/></button>}
            <div className="dropdown dropdown-bottom dropdown-end">
                <label tabIndex={0} className="btn btn-sm btn-outline"><FunnelIcon className="w-5 mr-2"/>Filter</label>
                <ul tabIndex={0} className="dropdown-content menu p-2 text-sm shadow bg-base-100 rounded-box w-52">
                    {
                        locationFilters.map((l, k) => {
                            return  <li key={k}><a onClick={() => showFiltersAndApply(l)}>{l}</a></li>
                        })
                    }
                    <div className="divider mt-0 mb-0"></div>
                    <li><a onClick={() => removeAppliedFilter()}>Remove Filter</a></li>
                </ul>
            </div>
        </div>
    )
}


function Transactions(){


    const [trans, setTrans] = useState(RECENT_TRANSACTIONS)

    const removeFilter = () => {
        setTrans(RECENT_TRANSACTIONS)
    }

    const applyFilter = (params) => {
        let filteredTransactions = RECENT_TRANSACTIONS.filter((t) => {return t.location == params})
        setTrans(filteredTransactions)
    }

    // Search according to name
    const applySearch = (value) => {
        let filteredTransactions = RECENT_TRANSACTIONS.filter((t) => {return t.email.toLowerCase().includes(value.toLowerCase()) ||  t.email.toLowerCase().includes(value.toLowerCase())})
        setTrans(filteredTransactions)
    }

    return(
        <>
            
            <TitleCard title="Recent Transactions" topMargin="mt-2" TopSideButtons={<TopSideButtons applySearch={applySearch} applyFilter={applyFilter} removeFilter={removeFilter}/>}>

                {/* Team Member list in table format loaded constant */}
            <div className="overflow-x-auto w-full">
                <table className="table w-full">
                    <thead>
                    <tr>
                        <th>Name</th>
                        <th>Email Id</th>
                        <th>Location</th>
                        <th>Amount</th>
                        <th>Transaction Date</th>
                    </tr>
                    </thead>
                    <tbody>
                        {
                            trans.map((l, k) => {
                                return(
                                    <tr key={k}>
                                    <td>
                                        <div className="flex items-center space-x-3">
                                            <div className="avatar">
                                                <div className="mask mask-circle w-12 h-12">
                                                    <img src={l.avatar} alt="Avatar" />
                                                </div>
                                            </div>
                                            <div>
                                                <div className="font-bold">{l.name}</div>
                                            </div>
                                        </div>
                                    </td>
                                    <td>{l.email}</td>
                                    <td>{l.location}</td>
                                    <td>${l.amount}</td>
                                    <td>{moment(l.date).format("D MMM")}</td>
                                    </tr>
                                )
                            })
                        }
                    </tbody>
                </table>
            </div>
            </TitleCard>
        </>
    )
}


export default Transactions

================================================
FILE: webui/src/features/user/ForgotPassword.js
================================================
import {useState, useRef} from 'react'
import {Link} from 'react-router-dom'
import LandingIntro from './LandingIntro'
import ErrorText from  '../../components/Typography/ErrorText'
import InputText from '../../components/Input/InputText'
import CheckCircleIcon  from '@heroicons/react/24/solid/CheckCircleIcon'

function ForgotPassword(){

    const INITIAL_USER_OBJ = {
        emailId : ""
    }

    const [loading, setLoading] = useState(false)
    const [errorMessage, setErrorMessage] = useState("")
    const [linkSent, setLinkSent] = useState(false)
    const [userObj, setUserObj] = useState(INITIAL_USER_OBJ)

    const submitForm = (e) =>{
        e.preventDefault()
        setErrorMessage("")

        if(userObj.emailId.trim() === "")return setErrorMessage("Email Id is required! (use any value)")
        else{
            setLoading(true)
            // Call API to send password reset link
            setLoading(false)
            setLinkSent(true)
        }
    }

    const updateFormValue = ({updateType, value}) => {
        setErrorMessage("")
        setUserObj({...userObj, [updateType] : value})
    }

    return(
        <div className="min-h-screen bg-base-200 flex items-center">
            <div className="card mx-auto w-full max-w-5xl  shadow-xl">
                <div className="grid  md:grid-cols-2 grid-cols-1  bg-base-100 rounded-xl">
                <div className=''>
                        <LandingIntro />
                </div>
                <div className='py-24 px-10'>
                    <h2 className='text-2xl font-semibold mb-2 text-center'>Forgot Password</h2>

                    {
                        linkSent && 
                        <>
                            <div className='text-center mt-8'><CheckCircleIcon className='inline-block w-32 text-success'/></div>
                            <p className='my-4 text-xl font-bold text-center'>Link Sent</p>
                            <p className='mt-4 mb-8 font-semibold text-center'>Check your email to reset password</p>
                            <div className='text-center mt-4'><Link to="/login"><button className="btn btn-block btn-primary ">Login</button></Link></div>

                        </>
                    }

                    {
                        !linkSent && 
                        <>
                            <p className='my-8 font-semibold text-center'>We will send password reset link on your email Id</p>
                            <form onSubmit={(e) => submitForm(e)}>

                                <div className="mb-4">

                                    <InputText type="emailId" defaultValue={userObj.emailId} updateType="emailId" containerStyle="mt-4" labelTitle="Email Id" updateFormValue={updateFormValue}/>


                                </div>

                                <ErrorText styleClass="mt-12">{errorMessage}</ErrorText>
                                <button type="submit" className={"btn mt-2 w-full btn-primary" + (loading ? " loading" : "")}>Send Reset Link</button>

                                <div className='text-center mt-4'>Don't have an account yet? <Link to="/register"><button className="  inline-block  hover:text-primary hover:underline hover:cursor-pointer transition duration-200">Register</button></Link></div>
                            </form>
                        </>
                    }
                    
                </div>
            </div>
            </div>
        </div>
    )
}

export default ForgotPassword

================================================
FILE: webui/src/features/user/LandingIntro.js
================================================
import TemplatePointers from "./components/TemplatePointers"


function LandingIntro(){

    return(
        <div className="hero min-h-full rounded-l-xl bg-base-200">
            <div className="hero-content py-12">
              <div className="max-w-md">

              <h1 className='text-3xl text-center font-bold '><img src="/transopt.png" className="w-12 inline-block mr-2 mask" alt="transopt" /></h1>

                <div className="text-center mt-12"><img src="./transopt.png" alt="TransOPT" className="w-48 inline-block"></img></div>
              
              {/* Importing pointers component */}
              <TemplatePointers />
              
              </div>

            </div>
          </div>
    )
      
  }
  
  export default LandingIntro

================================================
FILE: webui/src/features/user/Login.js
================================================
import {useState, useRef} from 'react'
import {Link} from 'react-router-dom'
import LandingIntro from './LandingIntro'
import ErrorText from  '../../components/Typography/ErrorText'
import InputText from '../../components/Input/InputText'

function Login(){

    const INITIAL_LOGIN_OBJ = {
        password : "",
        emailId : ""
    }

    const [loading, setLoading] = useState(false)
    const [errorMessage, setErrorMessage] = useState("")
    const [loginObj, setLoginObj] = useState(INITIAL_LOGIN_OBJ)

    const submitForm = (e) =>{
        e.preventDefault()
        setErrorMessage("")

        if(loginObj.emailId.trim() === "")return setErrorMessage("Email Id is required! (use any value)")
        if(loginObj.password.trim() === "")return setErrorMessage("Password is required! (use any value)")
        else{
            setLoading(true)
            // Call API to check user credentials and save token in localstorage
            localStorage.setItem("token", "DumyTokenHere")
            setLoading(false)
            window.location.href = '/app/welcome'
        }
    }

    const updateFormValue = ({updateType, value}) => {
        setErrorMessage("")
        setLoginObj({...loginObj, [updateType] : value})
    }

    return(
        <div className="min-h-screen bg-base-200 flex items-center">
            <div className="card mx-auto w-full max-w-5xl  shadow-xl">
                <div className="grid  md:grid-cols-2 grid-cols-1  bg-base-100 rounded-xl">
                <div className=''>
                        <LandingIntro />
                </div>
                <div className='py-24 px-10'>
                    <h2 className='text-2xl font-semibold mb-2 text-center'>Login</h2>
                    <form onSubmit={(e) => submitForm(e)}>

                        <div className="mb-4">

                            <InputText type="emailId" defaultValue={loginObj.emailId} updateType="emailId" containerStyle="mt-4" labelTitle="Email Id" updateFormValue={updateFormValue}/>

                            <InputText defaultValue={loginObj.password} type="password" updateType="password" containerStyle="mt-4" labelTitle="Password" updateFormValue={updateFormValue}/>

                        </div>

                        <div className='text-right text-primary'><Link to="/forgot-password"><span className="text-sm  inline-block  hover:text-primary hover:underline hover:cursor-pointer transition duration-200">Forgot Password?</span></Link>
                        </div>

                        <ErrorText styleClass="mt-8">{errorMessage}</ErrorText>
                        <button type="submit" className={"btn mt-2 w-full btn-primary" + (loading ? " loading" : "")}>Login</button>

                        <div className='text-center mt-4'>Don't have an account yet? <Link to="/register"><span className="  inline-block  hover:text-primary hover:underline hover:cursor-pointer transition duration-200">Register</span></Link></div>
                    </form>
                </div>
            </div>
            </div>
        </div>
    )
}

export default Login

================================================
FILE: webui/src/features/user/Register.js
================================================
import {useState, useRef} from 'react'
import {Link} from 'react-router-dom'
import LandingIntro from './LandingIntro'
import ErrorText from  '../../components/Typography/ErrorText'
import InputText from '../../components/Input/InputText'

function Register(){

    const INITIAL_REGISTER_OBJ = {
        name : "",
        password : "",
        emailId : ""
    }

    const [loading, setLoading] = useState(false)
    const [errorMessage, setErrorMessage] = useState("")
    const [registerObj, setRegisterObj] = useState(INITIAL_REGISTER_OBJ)

    const submitForm = (e) =>{
        e.preventDefault()
        setErrorMessage("")

        if(registerObj.name.trim() === "")return setErrorMessage("Name is required! (use any value)")
        if(registerObj.emailId.trim() === "")return setErrorMessage("Email Id is required! (use any value)")
        if(registerObj.password.trim() === "")return setErrorMessage("Password is required! (use any value)")
        else{
            setLoading(true)
            // Call API to check user credentials and save token in localstorage
            localStorage.setItem("token", "DumyTokenHere")
            setLoading(false)
            window.location.href = '/app/welcome'
        }
    }

    const updateFormValue = ({updateType, value}) => {
        setErrorMessage("")
        setRegisterObj({...registerObj, [updateType] : value})
    }

    return(
        <div className="min-h-screen bg-base-200 flex items-center">
            <div className="card mx-auto w-full max-w-5xl  shadow-xl">
                <div className="grid  md:grid-cols-2 grid-cols-1  bg-base-100 rounded-xl">
                <div className=''>
                        <LandingIntro />
                </div>
                <div className='py-24 px-10'>
                    <h2 className='text-2xl font-semibold mb-2 text-center'>Register</h2>
                    <form onSubmit={(e) => submitForm(e)}>

                        <div className="mb-4">

                            <InputText defaultValue={registerObj.name} updateType="name" containerStyle="mt-4" labelTitle="Name" updateFormValue={updateFormValue}/>

                            <InputText defaultValue={registerObj.emailId} updateType="emailId" containerStyle="mt-4" labelTitle="Email Id" updateFormValue={updateFormValue}/>

                            <InputText defaultValue={registerObj.password} type="password" updateType="password" containerStyle="mt-4" labelTitle="Password" updateFormValue={updateFormValue}/>

                        </div>

                        <ErrorText styleClass="mt-8">{errorMessage}</ErrorText>
                        <button type="submit" className={"btn mt-2 w-full btn-primary" + (loading ? " loading" : "")}>Register</button>

                        <div className='text-center mt-4'>Already have an account? <Link to="/login"><span className="  inline-block  hover:text-primary hover:underline hover:cursor-pointer transition duration-200">Login</span></Link></div>
                    </form>
                </div>
            </div>
            </div>
        </div>
    )
}

export default Register

================================================
FILE: webui/src/features/user/components/TemplatePointers.js
================================================
function TemplatePointers(){
    return(
        <>
         <h1 className="text-2xl mt-8 font-bold">Admin Dashboard Starter Kit</h1>
          <p className="py-2 mt-4">✓ <span className="font-semibold">Light/dark</span> mode toggle</p>
          <p className="py-2 ">✓ <span className="font-semibold">Redux toolkit</span> and other utility libraries configured</p>
          <p className="py-2">✓ <span className="font-semibold">Calendar, Modal, Sidebar </span> components</p>
          <p className="py-2  ">✓ User-friendly <span className="font-semibold">documentation</span></p>
          <p className="py-2  mb-4">✓ <span className="font-semibold">Daisy UI</span> components, <span className="font-semibold">Tailwind CSS</span> support</p>
        </>
    )
}

export default TemplatePointers

================================================
FILE: webui/src/index.css
================================================
@tailwind base;
@tailwind components;
@tailwind utilities;

.loading-indicator:before {
    content: '';
    background: #00000080;
    position: fixed;
    width: 100%;
    height: 100%;
    top: 0;
    left: 0;
    z-index: 1000;
  }
  
  .loading-indicator:after {
    content: ' ';
    position: fixed;
    top: 40%;
    left: 45%;
    z-index: 10010;
    color:white;
    text-align:center;
    font-weight:bold;
    font-size:1.2rem;        
    border: 16px solid #f3f3f3; /* Light grey */
    border-top: 16px solid #0474bf; /* Blue */
    border-radius: 50%;
    width: 120px;
    height: 120px;
    animation: spin 2s linear infinite;
}

================================================
FILE: webui/src/index.js
================================================
import React,  { Suspense } from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import App from './App';
import reportWebVitals from './reportWebVitals';
import store from './app/store'
import { Provider } from 'react-redux'
import SuspenseContent from './containers/SuspenseContent';

const root = ReactDOM.createRoot(document.getElementById('root'));
root.render(
  // <React.StrictMode>
    <Suspense fallback={<SuspenseContent />}>
        <Provider store={store}>
            <App />
        </Provider>
    </Suspense>
  // </React.StrictMode>
);

// If you want to start measuring performance in your app, pass a function
// to log results (for example: reportWebVitals(console.log))
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
reportWebVitals();


================================================
FILE: webui/src/pages/GettingStarted.js
================================================
import {useState, useRef} from 'react'
import {Link} from 'react-router-dom'
import DocGettingStarted from '../features/documentation/DocGettingStarted'

function ExternalPage(){


    return(
        <div className="">
            <DocGettingStarted />
        </div>
    )
}

export default ExternalPage

================================================
FILE: webui/src/pages/protected/404.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import FaceFrownIcon  from '@heroicons/react/24/solid/FaceFrownIcon'

function InternalPage(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : ""}))
      }, [])

    return(
        <div className="hero h-4/5 bg-base-200">
            <div className="hero-content text-accent text-center">
                <div className="max-w-md">
                <FaceFrownIcon className="h-48 w-48 inline-block"/>
                <h1 className="text-5xl  font-bold">404 - Not Found</h1>
                </div>
            </div>
        </div>
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Algorithm.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Algorithm from '../../features/algorithm/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Choose Algorithm Objects"}))
      }, [])


    return(
        <Algorithm />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Analytics.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Analytics from '../../features/analytics/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Analytics"}))
      }, [])


    return(
        <Analytics />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Bills.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Billing from '../../features/settings/billing'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Bills"}))
      }, [])


    return(
        <Billing />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Blank.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'

import DocumentIcon  from '@heroicons/react/24/solid/DocumentIcon'

function InternalPage(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Page Title"}))
      }, [])
      
    return(
        <div className="hero h-4/5 bg-base-200">
            <div className="hero-content text-accent text-center">
                <div className="max-w-md">
                <DocumentIcon className="h-48 w-48 inline-block"/>
                <h1 className="text-5xl mt-2 font-bold">Blank Page</h1>
                </div>
            </div>
        </div>
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Calendar.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Calendar from '../../features/calendar'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Calendar"}))
      }, [])


    return(
        <Calendar />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Charts.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import Charts from '../../features/charts'
import { setPageTitle } from '../../features/common/headerSlice'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Analytics"}))
      }, [])


    return(
        <Charts />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/ChatOpt.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import ChatBot from '../../features/chatbot/ChatBot'
import { setPageTitle } from '../../features/common/headerSlice'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "ChatOpt"}))
      }, [])


    return(
        <ChatBot/>
    )
}

export default InternalPage


================================================
FILE: webui/src/pages/protected/Dashboard.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Dashboard from '../../features/dashboard/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Dashboard"}))
      }, [])


    return(
        <Dashboard />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Experiment.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Experiment from '../../features/experiment/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Create New Experiment" }))
      }, [])


    return(
        <Experiment />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Integration.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Integration from '../../features/integration'

function InternalPage(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Integrations"}))
      }, [])
      
    return(
        <Integration />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Leads.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Leads from '../../features/leads'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Leads"}))
      }, [])


    return(
        <Leads />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/ProfileSettings.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import ProfileSettings from '../../features/settings/profilesettings'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Settings"}))
      }, [])


    return(
        <ProfileSettings />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Run.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import RunPage from '../../features/run/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Run Optimization"}))
      }, [])


    return(
        <RunPage />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Seldata.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Seldata from '../../features/seldata/index'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Select Datasets"}))
      }, [])


    return(
        <Seldata />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Team.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Team from '../../features/settings/team'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Team Members"}))
      }, [])


    return(
        <Team/>
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Transactions.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import Transactions from '../../features/transactions'

function InternalPage(){
    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : "Transactions"}))
      }, [])


    return(
        <Transactions />
    )
}

export default InternalPage

================================================
FILE: webui/src/pages/protected/Welcome.js
================================================
import { useEffect } from 'react'
import { useDispatch } from 'react-redux'
import { setPageTitle } from '../../features/common/headerSlice'
import {Link} from 'react-router-dom'
import TemplatePointers from '../../features/user/components/TemplatePointers'

function InternalPage(){

    const dispatch = useDispatch()

    useEffect(() => {
        dispatch(setPageTitle({ title : ""}))
      }, [])

    return(
      <div className="hero h-4/5 bg-base-200">
      <div className="hero-content">
        <div className="max-w-md">
            <TemplatePointers />
            <Link to="/app/dashboard"><button className="btn bg-base-100 btn-outline">Get Started</button></Link>
        </div>
      </div>
    </div>
    )
}

export default InternalPage

================================================
FILE: webui/src/reportWebVitals.js
================================================
const reportWebVitals = onPerfEntry => {
  if (onPerfEntry && onPerfEntry instanceof Function) {
    import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
      getCLS(onPerfEntry);
      getFID(onPerfEntry);
      getFCP(onPerfEntry);
      getLCP(onPerfEntry);
      getTTFB(onPerfEntry);
    });
  }
};

export default reportWebVitals;


================================================
FILE: webui/src/routes/index.js
================================================
// All components mapping with path for internal routes

import { lazy } from 'react'

const Dashboard = lazy(() => import('../pages/protected/Dashboard'))
const Welcome = lazy(() => import('../pages/protected/Welcome'))
const ChatOpt = lazy(() => import('../pages/protected/ChatOpt'))
const Experiment = lazy(() => import('../pages/protected/Experiment'))
const Run = lazy(() => import('../pages/protected/Run'))
const Selectdatasets = lazy(() => import('../pages/protected/Seldata'))
const Analytics = lazy(() => import('../pages/protected/Analytics'))


// const Page404 = lazy(() => import('../pages/protected/404'))
// const Blank = lazy(() => import('../pages/protected/Blank'))
// const Charts = lazy(() => import('../pages/protected/Charts'))
// const Leads = lazy(() => import('../pages/protected/Leads'))
// const Integration = lazy(() => import('../pages/protected/Integration'))
// const Calendar = lazy(() => import('../pages/protected/Calendar'))
// const Team = lazy(() => import('../pages/protected/Team'))
// const Transactions = lazy(() => import('../pages/protected/Transactions'))
// const Bills = lazy(() => import('../pages/protected/Bills'))
// const ProfileSettings = lazy(() => import('../pages/protected/ProfileSettings'))
const GettingStarted = lazy(() => import('../pages/GettingStarted'))
// const DocFeatures = lazy(() => import('../pages/DocFeatures'))
// const DocComponents = lazy(() => import('../pages/DocComponents'))


const routes = [
  {
    path: '/dashboard', // the url
    component: Dashboard, // view rendered
  },
  {
    path: '/welcome', // the url
    component: Welcome, // view rendered
  },

  {
    path: '/chatopt', // the url
    component: ChatOpt, // view rendered
  },

  {
    path: '/optimization/problem', // the url
    component: Experiment, // view rendered
  },


  {
    path: '/optimization/selectdatasets', // the url
    component: Selectdatasets, // view rendered
  },

  {
    path: '/optimization/run', // the url
    component: Run, // view rendered
  },

  {
    path: '/analytics', // the url
    component: Analytics, // view rendered
  },


  // {
  //   path: '/leads',
  //   component: Leads,
  // },
  // {
  //   path: '/settings-team',
  //   component: Team,
  // },
  // {
  //   path: '/calendar',
  //   component: Calendar,
  // },
  // {
  //   path: '/transactions',
  //   component: Transactions,
  // },
  // {
  //   path: '/settings-profile',
  //   component: ProfileSettings,
  // },
  // {
  //   path: '/settings-billing',
  //   component: Bills,
  // },
  // {
  //   path: '/getting-started',
  //   component: GettingStarted,
  // },
  // {
  //   path: '/features',
  //   component: DocFeatures,
  // },
  // {
  //   path: '/components',
  //   component: DocComponents,
  // },
  // {
  //   path: '/integration',
  //   component: Integration,
  // },
  // {
  //   path: '/charts',
  //   component: Charts,
  // },
  // {
  //   path: '/404',
  //   component: Page404,
  // },
  // {
  //   path: '/blank',
  //   component: Blank,
  // },
]

export default routes


================================================
FILE: webui/src/routes/sidebar.js
================================================
/** Icons are imported separatly to reduce build time */
import Squares2X2Icon from '@heroicons/react/24/outline/Squares2X2Icon'
import ChartBarIcon from '@heroicons/react/24/outline/ChartBarIcon'
import ChatBubbleLeftIcon from '@heroicons/react/24/outline/ChatBubbleLeftIcon'
import QuestionMarkCircleIcon from '@heroicons/react/24/outline/QuestionMarkCircleIcon'
import FolderOpenIcon from '@heroicons/react/24/outline/FolderOpenIcon'
import PlayIcon from '@heroicons/react/24/outline/PlayIcon'
import CogIcon from '@heroicons/react/24/outline/CogIcon'
import AdjustmentsHorizontalIcon from '@heroicons/react/24/outline/AdjustmentsHorizontalIcon'


const iconClasses = `h-6 w-6`
const submenuIconClasses = `h-5 w-5`

const routes = [

  {
    path: '/app/dashboard',
    icon: <Squares2X2Icon className={iconClasses}/>, 
    name: 'Dashboard',
  },

  {
    path: '/app/optimization', //no url needed as this has submenu
    icon: <CogIcon className={`${iconClasses} inline` }/>, // icon component
    name: 'Experiments', // name that appear in Sidebar
    submenu : [
      {
        path: '/app/optimization/problem',
        icon: <QuestionMarkCircleIcon className={submenuIconClasses}/>,
        name: 'Create New Experiment',
      },

      {
        path: '/app/optimization/selectdatasets',
        icon: <FolderOpenIcon className={submenuIconClasses}/>,
        name: 'Select Datasets',
      },
      {
        path: '/app/optimization/run',
        icon: <PlayIcon className={submenuIconClasses}/>,
        name: 'Run',
      },
    ]
  },

  {
    path: '/app/analytics', // url
    icon: <ChartBarIcon className={iconClasses}/>, // icon component
    name: 'Analytics', // name that appear in Sidebar
  },
  
  {
    path: '/app/chatopt',
    icon: <ChatBubbleLeftIcon className={iconClasses}/>, 
    name: 'ChatOpt',
  },

]

export default routes


================================================
FILE: webui/src/setupTests.js
================================================
// jest-dom adds custom jest matchers for asserting on DOM nodes.
// allows you to do things like:
// expect(element).toHaveTextContent(/react/i)
// learn more: https://github.com/testing-library/jest-dom
import '@testing-library/jest-dom';


================================================
FILE: webui/src/utils/dummyData.js
================================================
const moment  = require("moment");

module.exports = Object.freeze({
    CALENDAR_INITIAL_EVENTS : [
        {title : "Product call", theme : "GREEN", startTime : moment().add(-12, 'd').startOf('day'), endTime : moment().add(-12, 'd').endOf('day')},
        {title : "Meeting with tech team", theme : "PINK", startTime : moment().add(-8, 'd').startOf('day'), endTime : moment().add(-8, 'd').endOf('day')},
        {title : "Meeting with Cristina", theme : "PURPLE", startTime : moment().add(-2, 'd').startOf('day'), endTime : moment().add(-2, 'd').endOf('day')},
        {title : "Meeting with Alex", theme : "BLUE", startTime : moment().startOf('day'), endTime : moment().endOf('day')}, 
        {title : "Product Call", theme : "GREEN", startTime : moment().startOf('day'), endTime : moment().endOf('day')},
        {title : "Client Meeting", theme : "PURPLE", startTime : moment().startOf('day'), endTime : moment().endOf('day')},
        {title : "Client Meeting", theme : "ORANGE", startTime : moment().add(3, 'd').startOf('day'), endTime : moment().add(3, 'd').endOf('day')},
        {title : "Product meeting", theme : "PINK", startTime : moment().add(5, 'd').startOf('day'), endTime : moment().add(5, 'd').endOf('day')},
        {title : "Sales Meeting", theme : "GREEN", startTime : moment().add(8, 'd').startOf('day'), endTime : moment().add(8, 'd').endOf('day')},
        {title : "Product Meeting", theme : "ORANGE", startTime : moment().add(8, 'd').startOf('day'), endTime : moment().add(8, 'd').endOf('day')},
        {title : "Marketing Meeting", theme : "PINK", startTime : moment().add(8, 'd').startOf('day'), endTime : moment().add(8, 'd').endOf('day')},
        {title : "Client Meeting", theme : "GREEN", startTime : moment().add(8, 'd').startOf('day'), endTime : moment().add(8, 'd').endOf('day')},
        {title : "Sales meeting", theme : "BLUE", startTime : moment().add(12, 'd').startOf('day'), endTime : moment().add(12, 'd').endOf('day')},
        {title : "Client meeting", theme : "PURPLE", startTime : moment().add(16, 'd').startOf('day'), endTime : moment().add(16, 'd').endOf('day')},
    ],

    RECENT_TRANSACTIONS : [
        {name : "Alex", avatar : "https://reqres.in/img/faces/1-image.jpg", email : "alex@dashwind.com", location : "Paris", amount : 100, date : moment().endOf('day')},
        {name : "Ereena", avatar : "https://reqres.in/img/faces/2-image.jpg", email : "ereena@dashwind.com", location : "London", amount : 190, date : moment().add(-1, 'd').endOf('day')},
        {name : "John", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "jhon@dashwind.com", location : "Canada", amount : 112, date : moment().add(-1, 'd').endOf('day')},
        {name : "Matrix", avatar : "https://reqres.in/img/faces/4-image.jpg", email : "matrix@dashwind.com", location : "Peru", amount : 111, date : moment().add(-1, 'd').endOf('day')},
        {name : "Virat", avatar : "https://reqres.in/img/faces/5-image.jpg", email : "virat@dashwind.com", location : "London", amount : 190, date : moment().add(-2, 'd').endOf('day')},
        {name : "Miya", avatar : "https://reqres.in/img/faces/6-image.jpg", email : "miya@dashwind.com", location : "Paris", amount : 230, date : moment().add(-2, 'd').endOf('day')},
        {name : "Virat", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "virat@dashwind.com", location : "Canada", amount : 331, date : moment().add(-2, 'd').endOf('day')},
        {name : "Matrix", avatar : "https://reqres.in/img/faces/1-image.jpg", email : "matrix@dashwind.com", location : "London", amount : 581, date : moment().add(-2, 'd').endOf('day')},
        {name : "Ereena", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "ereena@dashwind.com", location : "Tokyo", amount : 151, date : moment().add(-2, 'd').endOf('day')},
        {name : "John", avatar : "https://reqres.in/img/faces/2-image.jpg", email : "jhon@dashwind.com", location : "Paris", amount : 91, date : moment().add(-2, 'd').endOf('day')},
        {name : "Virat", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "virat@dashwind.com", location : "Canada", amount : 161, date : moment().add(-3, 'd').endOf('day')},
        {name : "Matrix", avatar : "https://reqres.in/img/faces/4-image.jpg", email : "matrix@dashwind.com", location : "US", amount : 121, date : moment().add(-3, 'd').endOf('day')},
        {name : "Ereena", avatar : "https://reqres.in/img/faces/6-image.jpg", email : "jhon@dashwind.com", location : "Tokyo", amount : 713, date : moment().add(-3, 'd').endOf('day')},
        {name : "John", avatar : "https://reqres.in/img/faces/2-image.jpg", email : "ereena@dashwind.com", location : "London", amount : 217, date : moment().add(-3, 'd').endOf('day')},
        {name : "Virat", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "virat@dashwind.com", location : "Paris", amount : 117, date : moment().add(-3, 'd').endOf('day')},
        {name : "Miya", avatar : "https://reqres.in/img/faces/7-image.jpg", email : "jhon@dashwind.com", location : "Canada", amount : 612, date : moment().add(-3, 'd').endOf('day')},
        {name : "Matrix", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "matrix@dashwind.com", location : "London", amount : 631, date : moment().add(-3, 'd').endOf('day')},
        {name : "Virat", avatar : "https://reqres.in/img/faces/2-image.jpg", email : "ereena@dashwind.com", location : "Tokyo", amount : 151, date : moment().add(-3, 'd').endOf('day')},
        {name : "Ereena", avatar : "https://reqres.in/img/faces/3-image.jpg", email : "virat@dashwind.com", location : "Paris", amount : 617, date : moment().add(-3, 'd').endOf('day')},

    
    ]
});


================================================
FILE: webui/src/utils/globalConstantUtil.js
================================================

module.exports = Object.freeze({
    MODAL_BODY_TYPES : {
        USER_DETAIL : "USER_DETAIL",
        LEAD_ADD_NEW : "LEAD_ADD_NEW",
        CONFIRMATION : "CONFIRMATION",
        DEFAULT : "",
    },

    RIGHT_DRAWER_TYPES : {
        NOTIFICATION : "NOTIFICATION",
        CALENDAR_EVENTS : "CALENDAR_EVENTS",
    },

    CONFIRMATION_MODAL_CLOSE_TYPES : {
        LEAD_DELETE : "LEAD_DELETE",
    },
});


================================================
FILE: webui/tailwind.config.js
================================================
/** @type {import('tailwindcss').Config} */
module.exports = {
  content: [
    "./src/**/*.{js,jsx,ts,tsx}",
    "./node_modules/react-tailwindcss-datepicker/dist/index.esm.js"
  ],
  darkMode: ["class", '[data-theme="dark"]'],
  theme: {
    extend: {},
  },
  plugins: [require("@tailwindcss/typography"), require("daisyui")],
  daisyui: {
    themes: ["light", "dark"],
  },

}